aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt2
-rw-r--r--Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt18
-rw-r--r--Documentation/x86/x86_64/mm.txt24
-rw-r--r--Makefile2
-rw-r--r--arch/arm/lib/csumpartialcopyuser.S4
-rw-r--r--arch/arm64/kvm/hyp/debug-sr.c3
-rw-r--r--arch/parisc/boot/compressed/misc.c4
-rw-r--r--arch/parisc/include/asm/thread_info.h5
-rw-r--r--arch/parisc/kernel/entry.S12
-rw-r--r--arch/parisc/kernel/hpmc.S1
-rw-r--r--arch/parisc/kernel/unwind.c1
-rw-r--r--arch/parisc/lib/delay.c2
-rw-r--r--arch/powerpc/include/asm/mmu_context.h5
-rw-r--r--arch/powerpc/kernel/process.c2
-rw-r--r--arch/powerpc/kvm/book3s_xive.c7
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c6
-rw-r--r--arch/powerpc/perf/core-book3s.c8
-rw-r--r--arch/powerpc/perf/imc-pmu.c17
-rw-r--r--arch/s390/net/bpf_jit_comp.c11
-rw-r--r--arch/sparc/mm/fault_32.c2
-rw-r--r--arch/sparc/mm/fault_64.c2
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c6
-rw-r--r--arch/um/include/asm/mmu_context.h3
-rw-r--r--arch/um/kernel/trap.c2
-rw-r--r--arch/unicore32/include/asm/mmu_context.h5
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/entry/entry_32.S14
-rw-r--r--arch/x86/entry/entry_64.S189
-rw-r--r--arch/x86/entry/entry_64_compat.S7
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c38
-rw-r--r--arch/x86/include/asm/cpu_entry_area.h68
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/desc.h12
-rw-r--r--arch/x86/include/asm/espfix.h7
-rw-r--r--arch/x86/include/asm/fixmap.h7
-rw-r--r--arch/x86/include/asm/hypervisor.h25
-rw-r--r--arch/x86/include/asm/invpcid.h53
-rw-r--r--arch/x86/include/asm/irqflags.h3
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/mmu.h4
-rw-r--r--arch/x86/include/asm/mmu_context.h54
-rw-r--r--arch/x86/include/asm/paravirt.h9
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h15
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h47
-rw-r--r--arch/x86/include/asm/processor.h59
-rw-r--r--arch/x86/include/asm/stacktrace.h3
-rw-r--r--arch/x86/include/asm/switch_to.h8
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/tlbflush.h136
-rw-r--r--arch/x86/include/asm/traps.h1
-rw-r--r--arch/x86/include/asm/unwind.h7
-rw-r--r--arch/x86/kernel/asm-offsets.c6
-rw-r--r--arch/x86/kernel/asm-offsets_32.c9
-rw-r--r--arch/x86/kernel/asm-offsets_64.c4
-rw-r--r--arch/x86/kernel/cpu/common.c96
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c13
-rw-r--r--arch/x86/kernel/doublefault.c36
-rw-r--r--arch/x86/kernel/dumpstack.c75
-rw-r--r--arch/x86/kernel/dumpstack_32.c6
-rw-r--r--arch/x86/kernel/dumpstack_64.c12
-rw-r--r--arch/x86/kernel/ioport.c2
-rw-r--r--arch/x86/kernel/irq.c12
-rw-r--r--arch/x86/kernel/irq_64.c4
-rw-r--r--arch/x86/kernel/ldt.c47
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c2
-rw-r--r--arch/x86/kernel/process.c19
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c14
-rw-r--r--arch/x86/kernel/smpboot.c6
-rw-r--r--arch/x86/kernel/traps.c75
-rw-r--r--arch/x86/kernel/unwind_orc.c88
-rw-r--r--arch/x86/kernel/vmlinux.lds.S9
-rw-r--r--arch/x86/kvm/emulate.c32
-rw-r--r--arch/x86/kvm/mmu.c8
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c48
-rw-r--r--arch/x86/lib/delay.c4
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/cpu_entry_area.c139
-rw-r--r--arch/x86/mm/dump_pagetables.c98
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/init_32.c6
-rw-r--r--arch/x86/mm/kasan_init_64.c23
-rw-r--r--arch/x86/mm/pgtable_32.c1
-rw-r--r--arch/x86/mm/tlb.c10
-rw-r--r--arch/x86/platform/uv/tlb_uv.c2
-rw-r--r--arch/x86/power/cpu.c16
-rw-r--r--arch/x86/xen/enlighten.c81
-rw-r--r--arch/x86/xen/enlighten_pv.c5
-rw-r--r--arch/x86/xen/mmu_pv.c14
-rw-r--r--arch/x86/xen/setup.c6
-rw-r--r--block/bio.c2
-rw-r--r--block/blk-map.c38
-rw-r--r--block/blk-throttle.c8
-rw-r--r--block/bounce.c6
-rw-r--r--block/kyber-iosched.c37
-rw-r--r--crypto/af_alg.c6
-rw-r--r--crypto/algif_aead.c16
-rw-r--r--crypto/algif_skcipher.c16
-rw-r--r--crypto/mcryptd.c23
-rw-r--r--crypto/skcipher.c10
-rw-r--r--drivers/acpi/apei/erst.c2
-rw-r--r--drivers/acpi/cppc_acpi.c2
-rw-r--r--drivers/acpi/nfit/core.c9
-rw-r--r--drivers/block/null_blk.c4
-rw-r--r--drivers/clk/clk.c5
-rw-r--r--drivers/clk/sunxi/clk-sun9i-mmc.c12
-rw-r--r--drivers/cpufreq/cpufreq_governor.c19
-rw-r--r--drivers/cpufreq/imx6q-cpufreq.c11
-rw-r--r--drivers/gpio/gpio-reg.c4
-rw-r--r--drivers/gpio/gpiolib-acpi.c2
-rw-r--r--drivers/gpio/gpiolib-devprop.c17
-rw-r--r--drivers/gpio/gpiolib-of.c3
-rw-r--r--drivers/gpio/gpiolib.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c13
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c51
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c26
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c9
-rw-r--r--drivers/gpu/drm/drm_lease.c22
-rw-r--r--drivers/gpu/drm/drm_plane.c42
-rw-r--r--drivers/gpu/drm/drm_syncobj.c77
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c9
-rw-r--r--drivers/gpu/drm/i915/i915_sw_fence.c3
-rw-r--r--drivers/gpu/drm/i915/intel_breadcrumbs.c22
-rw-r--r--drivers/gpu/drm/i915/intel_ddi.c4
-rw-r--r--drivers/gpu/drm/i915/intel_display.c3
-rw-r--r--drivers/gpu/drm/i915/intel_lpe_audio.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c5
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drv.h11
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fbcon.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_mem.c6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_ttm.c39
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_vmm.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/base.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c9
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c7
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c20
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_tcon.c4
-rw-r--r--drivers/gpu/drm/ttm/ttm_page_alloc.c3
-rw-r--r--drivers/hwmon/hwmon.c21
-rw-r--r--drivers/mfd/cros_ec_spi.c53
-rw-r--r--drivers/mfd/twl4030-audio.c9
-rw-r--r--drivers/mfd/twl6040.c12
-rw-r--r--drivers/mtd/mtdcore.c2
-rw-r--r--drivers/mtd/nand/brcmnand/brcmnand.c2
-rw-r--r--drivers/mtd/nand/gpio.c6
-rw-r--r--drivers/mtd/nand/gpmi-nand/gpmi-nand.c6
-rw-r--r--drivers/net/ethernet/arc/emac.h2
-rw-r--r--drivers/net/ethernet/arc/emac_main.c164
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c4
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c8
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c63
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c75
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/rl.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vxlan.c64
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vxlan.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c15
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.c55
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h8
-rw-r--r--drivers/net/ethernet/qualcomm/emac/emac.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c5
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/enh_desc.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/norm_desc.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c2
-rw-r--r--drivers/net/phy/marvell.c14
-rw-r--r--drivers/net/phy/mdio-xgene.c21
-rw-r--r--drivers/net/vxlan.c19
-rw-r--r--drivers/net/wireless/mac80211_hwsim.c3
-rw-r--r--drivers/nvdimm/btt.c201
-rw-r--r--drivers/nvdimm/btt.h47
-rw-r--r--drivers/nvdimm/pfn_devs.c20
-rw-r--r--drivers/nvme/host/core.c11
-rw-r--r--drivers/nvme/host/fc.c1
-rw-r--r--drivers/parisc/lba_pci.c33
-rw-r--r--drivers/pci/pci-driver.c7
-rw-r--r--drivers/pinctrl/intel/pinctrl-cherryview.c16
-rw-r--r--drivers/s390/net/qeth_core_main.c9
-rw-r--r--drivers/scsi/aacraid/aacraid.h1
-rw-r--r--drivers/scsi/aacraid/linit.c2
-rw-r--r--drivers/scsi/osd/osd_initiator.c4
-rw-r--r--drivers/scsi/scsi_devinfo.c6
-rw-r--r--drivers/scsi/scsi_scan.c13
-rw-r--r--drivers/scsi/scsi_sysfs.c5
-rw-r--r--drivers/scsi/scsi_transport_spi.c12
-rw-r--r--drivers/spi/spi-armada-3700.c8
-rw-r--r--drivers/spi/spi-atmel.c2
-rw-r--r--drivers/spi/spi-rspi.c4
-rw-r--r--drivers/spi/spi-sun4i.c2
-rw-r--r--drivers/spi/spi-xilinx.c11
-rw-r--r--drivers/target/target_core_pscsi.c4
-rw-r--r--drivers/xen/balloon.c65
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c4
-rw-r--r--fs/xfs/libxfs/xfs_attr.c20
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c9
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h3
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c2
-rw-r--r--fs/xfs/libxfs/xfs_defer.c39
-rw-r--r--fs/xfs/libxfs/xfs_defer.h5
-rw-r--r--fs/xfs/libxfs/xfs_iext_tree.c4
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c52
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c99
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h16
-rw-r--r--fs/xfs/xfs_extfree_item.c2
-rw-r--r--fs/xfs/xfs_fsops.c5
-rw-r--r--fs/xfs/xfs_icache.c35
-rw-r--r--fs/xfs/xfs_icache.h1
-rw-r--r--fs/xfs/xfs_inode.c28
-rw-r--r--fs/xfs/xfs_inode.h1
-rw-r--r--fs/xfs/xfs_reflink.c21
-rw-r--r--fs/xfs/xfs_super.c9
-rw-r--r--include/asm-generic/mm_hooks.h5
-rw-r--r--include/asm-generic/pgtable.h5
-rw-r--r--include/crypto/mcryptd.h1
-rw-r--r--include/kvm/arm_arch_timer.h2
-rw-r--r--include/linux/bio.h2
-rw-r--r--include/linux/blk_types.h9
-rw-r--r--include/linux/blkdev.h25
-rw-r--r--include/linux/bpf_verifier.h4
-rw-r--r--include/linux/ipv6.h3
-rw-r--r--include/linux/mfd/rtsx_pci.h2
-rw-r--r--include/linux/mlx5/driver.h3
-rw-r--r--include/linux/mlx5/mlx5_ifc.h8
-rw-r--r--include/linux/spi/spi.h2
-rw-r--r--include/net/cfg80211.h1
-rw-r--r--include/net/pkt_cls.h5
-rw-r--r--include/trace/events/clk.h4
-rw-r--r--include/trace/events/kvm.h7
-rw-r--r--include/xen/balloon.h5
-rw-r--r--init/main.c6
-rw-r--r--kernel/bpf/verifier.c283
-rw-r--r--kernel/fork.c3
-rw-r--r--lib/test_bpf.c43
-rw-r--r--mm/backing-dev.c5
-rw-r--r--net/bridge/br_netlink.c11
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/skbuff.c7
-rw-r--r--net/ipv4/fib_frontend.c9
-rw-r--r--net/ipv4/fib_semantics.c8
-rw-r--r--net/ipv4/ip_gre.c1
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/ip6_gre.c1
-rw-r--r--net/ipv6/ip6_output.c12
-rw-r--r--net/ipv6/ip6_tunnel.c9
-rw-r--r--net/ipv6/ipv6_sockglue.c1
-rw-r--r--net/ipv6/route.c20
-rw-r--r--net/openvswitch/flow.c15
-rw-r--r--net/sched/cls_bpf.c93
-rw-r--r--net/sctp/debug.c3
-rw-r--r--net/sctp/ulpqueue.c24
-rw-r--r--net/tipc/group.c16
-rw-r--r--net/wireless/Makefile31
-rw-r--r--net/wireless/certs/sforshee.hex86
-rw-r--r--net/wireless/certs/sforshee.x509bin680 -> 0 bytes
-rw-r--r--net/wireless/nl80211.c6
-rw-r--r--tools/arch/s390/include/uapi/asm/bpf_perf_event.h2
-rwxr-xr-xtools/kvm/kvm_stat/kvm_stat74
-rw-r--r--tools/kvm/kvm_stat/kvm_stat.txt4
-rw-r--r--tools/testing/selftests/bpf/Makefile2
-rw-r--r--tools/testing/selftests/bpf/test_progs.c8
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c629
-rw-r--r--tools/testing/selftests/net/config1
-rw-r--r--tools/testing/selftests/x86/ldt_gdt.c9
-rw-r--r--virt/kvm/arm/arch_timer.c40
-rw-r--r--virt/kvm/arm/arm.c2
-rw-r--r--virt/kvm/arm/mmio.c6
-rw-r--r--virt/kvm/arm/mmu.c10
286 files changed, 4166 insertions, 1652 deletions
diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
index 376fa2f50e6b..956bb046e599 100644
--- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
+++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
@@ -13,7 +13,6 @@ Required properties:
13 at25df321a 13 at25df321a
14 at25df641 14 at25df641
15 at26df081a 15 at26df081a
16 en25s64
17 mr25h128 16 mr25h128
18 mr25h256 17 mr25h256
19 mr25h10 18 mr25h10
@@ -33,7 +32,6 @@ Required properties:
33 s25fl008k 32 s25fl008k
34 s25fl064k 33 s25fl064k
35 sst25vf040b 34 sst25vf040b
36 sst25wf040b
37 m25p40 35 m25p40
38 m25p80 36 m25p80
39 m25p16 37 m25p16
diff --git a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
index 5bf13960f7f4..e3c48b20b1a6 100644
--- a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
+++ b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
@@ -12,24 +12,30 @@ Required properties:
12 - "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc 12 - "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc
13- reg : Offset and length of the register set for the device 13- reg : Offset and length of the register set for the device
14- interrupts : Should contain CSPI/eCSPI interrupt 14- interrupts : Should contain CSPI/eCSPI interrupt
15- cs-gpios : Specifies the gpio pins to be used for chipselects.
16- clocks : Clock specifiers for both ipg and per clocks. 15- clocks : Clock specifiers for both ipg and per clocks.
17- clock-names : Clock names should include both "ipg" and "per" 16- clock-names : Clock names should include both "ipg" and "per"
18See the clock consumer binding, 17See the clock consumer binding,
19 Documentation/devicetree/bindings/clock/clock-bindings.txt 18 Documentation/devicetree/bindings/clock/clock-bindings.txt
20- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
21 Documentation/devicetree/bindings/dma/dma.txt
22- dma-names: DMA request names should include "tx" and "rx" if present.
23 19
24Obsolete properties: 20Recommended properties:
25- fsl,spi-num-chipselects : Contains the number of the chipselect 21- cs-gpios : GPIOs to use as chip selects, see spi-bus.txt. While the native chip
22select lines can be used, they appear to always generate a pulse between each
23word of a transfer. Most use cases will require GPIO based chip selects to
24generate a valid transaction.
26 25
27Optional properties: 26Optional properties:
27- num-cs : Number of total chip selects, see spi-bus.txt.
28- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
29Documentation/devicetree/bindings/dma/dma.txt.
30- dma-names: DMA request names, if present, should include "tx" and "rx".
28- fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register 31- fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register
29controlling the SPI_READY handling. Note that to enable the DRCTL consideration, 32controlling the SPI_READY handling. Note that to enable the DRCTL consideration,
30the SPI_READY mode-flag needs to be set too. 33the SPI_READY mode-flag needs to be set too.
31Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst). 34Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst).
32 35
36Obsolete properties:
37- fsl,spi-num-chipselects : Contains the number of the chipselect
38
33Example: 39Example:
34 40
35ecspi@70010000 { 41ecspi@70010000 {
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 3448e675b462..51101708a03a 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -1,6 +1,4 @@
1 1
2<previous description obsolete, deleted>
3
4Virtual memory map with 4 level page tables: 2Virtual memory map with 4 level page tables:
5 3
60000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm 40000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
@@ -14,13 +12,15 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
14... unused hole ... 12... unused hole ...
15ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB) 13ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
16... unused hole ... 14... unused hole ...
15fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
17ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks 16ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
18... unused hole ... 17... unused hole ...
19ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space 18ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
20... unused hole ... 19... unused hole ...
21ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0 20ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
22ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable) 21ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space (variable)
23ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls 22[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
23ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
24ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole 24ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
25 25
26Virtual memory map with 5 level page tables: 26Virtual memory map with 5 level page tables:
@@ -36,19 +36,22 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
36... unused hole ... 36... unused hole ...
37ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB) 37ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
38... unused hole ... 38... unused hole ...
39fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
39ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks 40ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
40... unused hole ... 41... unused hole ...
41ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space 42ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
42... unused hole ... 43... unused hole ...
43ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0 44ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
44ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space 45ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space
45ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls 46[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
47ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
46ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole 48ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
47 49
48Architecture defines a 64-bit virtual address. Implementations can support 50Architecture defines a 64-bit virtual address. Implementations can support
49less. Currently supported are 48- and 57-bit virtual addresses. Bits 63 51less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
50through to the most-significant implemented bit are set to either all ones 52through to the most-significant implemented bit are sign extended.
51or all zero. This causes hole between user space and kernel addresses. 53This causes hole between user space and kernel addresses if you interpret them
54as unsigned.
52 55
53The direct mapping covers all memory in the system up to the highest 56The direct mapping covers all memory in the system up to the highest
54memory address (this means in some cases it can also include PCI memory 57memory address (this means in some cases it can also include PCI memory
@@ -58,9 +61,6 @@ vmalloc space is lazily synchronized into the different PML4/PML5 pages of
58the processes using the page fault handler, with init_top_pgt as 61the processes using the page fault handler, with init_top_pgt as
59reference. 62reference.
60 63
61Current X86-64 implementations support up to 46 bits of address space (64 TB),
62which is our current limit. This expands into MBZ space in the page tables.
63
64We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual 64We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
65memory window (this size is arbitrary, it can be raised later if needed). 65memory window (this size is arbitrary, it can be raised later if needed).
66The mappings are not part of any other kernel PGD and are only available 66The mappings are not part of any other kernel PGD and are only available
@@ -72,5 +72,3 @@ following fixmap section.
72Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all 72Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
73physical memory, vmalloc/ioremap space and virtual memory map are randomized. 73physical memory, vmalloc/ioremap space and virtual memory map are randomized.
74Their order is preserved but their base will be offset early at boot time. 74Their order is preserved but their base will be offset early at boot time.
75
76-Andi Kleen, Jul 2004
diff --git a/Makefile b/Makefile
index 7e02f951b284..ac8c441866b7 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
2VERSION = 4 2VERSION = 4
3PATCHLEVEL = 15 3PATCHLEVEL = 15
4SUBLEVEL = 0 4SUBLEVEL = 0
5EXTRAVERSION = -rc4 5EXTRAVERSION = -rc5
6NAME = Fearless Coyote 6NAME = Fearless Coyote
7 7
8# *DOCUMENTATION* 8# *DOCUMENTATION*
diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
index 1712f132b80d..b83fdc06286a 100644
--- a/arch/arm/lib/csumpartialcopyuser.S
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -85,7 +85,11 @@
85 .pushsection .text.fixup,"ax" 85 .pushsection .text.fixup,"ax"
86 .align 4 86 .align 4
879001: mov r4, #-EFAULT 879001: mov r4, #-EFAULT
88#ifdef CONFIG_CPU_SW_DOMAIN_PAN
89 ldr r5, [sp, #9*4] @ *err_ptr
90#else
88 ldr r5, [sp, #8*4] @ *err_ptr 91 ldr r5, [sp, #8*4] @ *err_ptr
92#endif
89 str r4, [r5] 93 str r4, [r5]
90 ldmia sp, {r1, r2} @ retrieve dst, len 94 ldmia sp, {r1, r2} @ retrieve dst, len
91 add r2, r2, r1 95 add r2, r2, r1
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index 321c9c05dd9e..f4363d40e2cd 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -74,6 +74,9 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
74{ 74{
75 u64 reg; 75 u64 reg;
76 76
77 /* Clear pmscr in case of early return */
78 *pmscr_el1 = 0;
79
77 /* SPE present on this CPU? */ 80 /* SPE present on this CPU? */
78 if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1), 81 if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
79 ID_AA64DFR0_PMSVER_SHIFT)) 82 ID_AA64DFR0_PMSVER_SHIFT))
diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c
index 9345b44b86f0..f57118e1f6b4 100644
--- a/arch/parisc/boot/compressed/misc.c
+++ b/arch/parisc/boot/compressed/misc.c
@@ -123,8 +123,8 @@ int puts(const char *s)
123 while ((nuline = strchr(s, '\n')) != NULL) { 123 while ((nuline = strchr(s, '\n')) != NULL) {
124 if (nuline != s) 124 if (nuline != s)
125 pdc_iodc_print(s, nuline - s); 125 pdc_iodc_print(s, nuline - s);
126 pdc_iodc_print("\r\n", 2); 126 pdc_iodc_print("\r\n", 2);
127 s = nuline + 1; 127 s = nuline + 1;
128 } 128 }
129 if (*s != '\0') 129 if (*s != '\0')
130 pdc_iodc_print(s, strlen(s)); 130 pdc_iodc_print(s, strlen(s));
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index c980a02a52bc..598c8d60fa5e 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -35,7 +35,12 @@ struct thread_info {
35 35
36/* thread information allocation */ 36/* thread information allocation */
37 37
38#ifdef CONFIG_IRQSTACKS
39#define THREAD_SIZE_ORDER 2 /* PA-RISC requires at least 16k stack */
40#else
38#define THREAD_SIZE_ORDER 3 /* PA-RISC requires at least 32k stack */ 41#define THREAD_SIZE_ORDER 3 /* PA-RISC requires at least 32k stack */
42#endif
43
39/* Be sure to hunt all references to this down when you change the size of 44/* Be sure to hunt all references to this down when you change the size of
40 * the kernel stack */ 45 * the kernel stack */
41#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) 46#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index a4fd296c958e..f3cecf5117cf 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -878,9 +878,6 @@ ENTRY_CFI(syscall_exit_rfi)
878 STREG %r19,PT_SR7(%r16) 878 STREG %r19,PT_SR7(%r16)
879 879
880intr_return: 880intr_return:
881 /* NOTE: Need to enable interrupts incase we schedule. */
882 ssm PSW_SM_I, %r0
883
884 /* check for reschedule */ 881 /* check for reschedule */
885 mfctl %cr30,%r1 882 mfctl %cr30,%r1
886 LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */ 883 LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */
@@ -907,6 +904,11 @@ intr_check_sig:
907 LDREG PT_IASQ1(%r16), %r20 904 LDREG PT_IASQ1(%r16), %r20
908 cmpib,COND(=),n 0,%r20,intr_restore /* backward */ 905 cmpib,COND(=),n 0,%r20,intr_restore /* backward */
909 906
907 /* NOTE: We need to enable interrupts if we have to deliver
908 * signals. We used to do this earlier but it caused kernel
909 * stack overflows. */
910 ssm PSW_SM_I, %r0
911
910 copy %r0, %r25 /* long in_syscall = 0 */ 912 copy %r0, %r25 /* long in_syscall = 0 */
911#ifdef CONFIG_64BIT 913#ifdef CONFIG_64BIT
912 ldo -16(%r30),%r29 /* Reference param save area */ 914 ldo -16(%r30),%r29 /* Reference param save area */
@@ -958,6 +960,10 @@ intr_do_resched:
958 cmpib,COND(=) 0, %r20, intr_do_preempt 960 cmpib,COND(=) 0, %r20, intr_do_preempt
959 nop 961 nop
960 962
963 /* NOTE: We need to enable interrupts if we schedule. We used
964 * to do this earlier but it caused kernel stack overflows. */
965 ssm PSW_SM_I, %r0
966
961#ifdef CONFIG_64BIT 967#ifdef CONFIG_64BIT
962 ldo -16(%r30),%r29 /* Reference param save area */ 968 ldo -16(%r30),%r29 /* Reference param save area */
963#endif 969#endif
diff --git a/arch/parisc/kernel/hpmc.S b/arch/parisc/kernel/hpmc.S
index e3a8e5e4d5de..8d072c44f300 100644
--- a/arch/parisc/kernel/hpmc.S
+++ b/arch/parisc/kernel/hpmc.S
@@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc)
305 305
306 306
307 __INITRODATA 307 __INITRODATA
308 .align 4
308 .export os_hpmc_size 309 .export os_hpmc_size
309os_hpmc_size: 310os_hpmc_size:
310 .word .os_hpmc_end-.os_hpmc 311 .word .os_hpmc_end-.os_hpmc
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 5a657986ebbf..143f90e2f9f3 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -15,7 +15,6 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/kallsyms.h> 16#include <linux/kallsyms.h>
17#include <linux/sort.h> 17#include <linux/sort.h>
18#include <linux/sched.h>
19 18
20#include <linux/uaccess.h> 19#include <linux/uaccess.h>
21#include <asm/assembly.h> 20#include <asm/assembly.h>
diff --git a/arch/parisc/lib/delay.c b/arch/parisc/lib/delay.c
index 7eab4bb8abe6..66e506520505 100644
--- a/arch/parisc/lib/delay.c
+++ b/arch/parisc/lib/delay.c
@@ -16,9 +16,7 @@
16#include <linux/preempt.h> 16#include <linux/preempt.h>
17#include <linux/init.h> 17#include <linux/init.h>
18 18
19#include <asm/processor.h>
20#include <asm/delay.h> 19#include <asm/delay.h>
21
22#include <asm/special_insns.h> /* for mfctl() */ 20#include <asm/special_insns.h> /* for mfctl() */
23#include <asm/processor.h> /* for boot_cpu_data */ 21#include <asm/processor.h> /* for boot_cpu_data */
24 22
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 6177d43f0ce8..e2a2b8400490 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -160,9 +160,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
160#endif 160#endif
161} 161}
162 162
163static inline void arch_dup_mmap(struct mm_struct *oldmm, 163static inline int arch_dup_mmap(struct mm_struct *oldmm,
164 struct mm_struct *mm) 164 struct mm_struct *mm)
165{ 165{
166 return 0;
166} 167}
167 168
168#ifndef CONFIG_PPC_BOOK3S_64 169#ifndef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 5acb5a176dbe..72be0c32e902 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1403,7 +1403,7 @@ void show_regs(struct pt_regs * regs)
1403 1403
1404 printk("NIP: "REG" LR: "REG" CTR: "REG"\n", 1404 printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
1405 regs->nip, regs->link, regs->ctr); 1405 regs->nip, regs->link, regs->ctr);
1406 printk("REGS: %p TRAP: %04lx %s (%s)\n", 1406 printk("REGS: %px TRAP: %04lx %s (%s)\n",
1407 regs, regs->trap, print_tainted(), init_utsname()->release); 1407 regs, regs->trap, print_tainted(), init_utsname()->release);
1408 printk("MSR: "REG" ", regs->msr); 1408 printk("MSR: "REG" ", regs->msr);
1409 print_msr_bits(regs->msr); 1409 print_msr_bits(regs->msr);
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index bf457843e032..0d750d274c4e 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
725 725
726 /* Return the per-cpu state for state saving/migration */ 726 /* Return the per-cpu state for state saving/migration */
727 return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT | 727 return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
728 (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT; 728 (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
729 (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
729} 730}
730 731
731int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) 732int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
@@ -1558,7 +1559,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
1558 1559
1559 /* 1560 /*
1560 * Restore P and Q. If the interrupt was pending, we 1561 * Restore P and Q. If the interrupt was pending, we
1561 * force both P and Q, which will trigger a resend. 1562 * force Q and !P, which will trigger a resend.
1562 * 1563 *
1563 * That means that a guest that had both an interrupt 1564 * That means that a guest that had both an interrupt
1564 * pending (queued) and Q set will restore with only 1565 * pending (queued) and Q set will restore with only
@@ -1566,7 +1567,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
1566 * is perfectly fine as coalescing interrupts that haven't 1567 * is perfectly fine as coalescing interrupts that haven't
1567 * been presented yet is always allowed. 1568 * been presented yet is always allowed.
1568 */ 1569 */
1569 if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING) 1570 if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
1570 state->old_p = true; 1571 state->old_p = true;
1571 if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING) 1572 if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
1572 state->old_q = true; 1573 state->old_q = true;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 46d74e81aff1..d183b4801bdb 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -763,7 +763,8 @@ emit_clear:
763 func = (u8 *) __bpf_call_base + imm; 763 func = (u8 *) __bpf_call_base + imm;
764 764
765 /* Save skb pointer if we need to re-cache skb data */ 765 /* Save skb pointer if we need to re-cache skb data */
766 if (bpf_helper_changes_pkt_data(func)) 766 if ((ctx->seen & SEEN_SKB) &&
767 bpf_helper_changes_pkt_data(func))
767 PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); 768 PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
768 769
769 bpf_jit_emit_func_call(image, ctx, (u64)func); 770 bpf_jit_emit_func_call(image, ctx, (u64)func);
@@ -772,7 +773,8 @@ emit_clear:
772 PPC_MR(b2p[BPF_REG_0], 3); 773 PPC_MR(b2p[BPF_REG_0], 3);
773 774
774 /* refresh skb cache */ 775 /* refresh skb cache */
775 if (bpf_helper_changes_pkt_data(func)) { 776 if ((ctx->seen & SEEN_SKB) &&
777 bpf_helper_changes_pkt_data(func)) {
776 /* reload skb pointer to r3 */ 778 /* reload skb pointer to r3 */
777 PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); 779 PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
778 bpf_jit_emit_skb_loads(image, ctx); 780 bpf_jit_emit_skb_loads(image, ctx);
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 153812966365..fce545774d50 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)
410 int ret; 410 int ret;
411 __u64 target; 411 __u64 target;
412 412
413 if (is_kernel_addr(addr)) 413 if (is_kernel_addr(addr)) {
414 return branch_target((unsigned int *)addr); 414 if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
415 return 0;
416
417 return branch_target(&instr);
418 }
415 419
416 /* Userspace: need copy instruction here then translate it */ 420 /* Userspace: need copy instruction here then translate it */
417 pagefault_disable(); 421 pagefault_disable();
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 0ead3cd73caa..be4e7f84f70a 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -310,6 +310,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
310 return 0; 310 return 0;
311 311
312 /* 312 /*
313 * Check whether nest_imc is registered. We could end up here if the
314 * cpuhotplug callback registration fails. i.e, callback invokes the
315 * offline path for all successfully registered nodes. At this stage,
316 * nest_imc pmu will not be registered and we should return here.
317 *
318 * We return with a zero since this is not an offline failure. And
319 * cpuhp_setup_state() returns the actual failure reason to the caller,
320 * which in turn will call the cleanup routine.
321 */
322 if (!nest_pmus)
323 return 0;
324
325 /*
313 * Now that this cpu is one of the designated, 326 * Now that this cpu is one of the designated,
314 * find a next cpu a) which is online and b) in same chip. 327 * find a next cpu a) which is online and b) in same chip.
315 */ 328 */
@@ -1171,6 +1184,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
1171 if (nest_pmus == 1) { 1184 if (nest_pmus == 1) {
1172 cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE); 1185 cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
1173 kfree(nest_imc_refc); 1186 kfree(nest_imc_refc);
1187 kfree(per_nest_pmu_arr);
1174 } 1188 }
1175 1189
1176 if (nest_pmus > 0) 1190 if (nest_pmus > 0)
@@ -1195,7 +1209,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
1195 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); 1209 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
1196 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); 1210 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
1197 kfree(pmu_ptr); 1211 kfree(pmu_ptr);
1198 kfree(per_nest_pmu_arr);
1199 return; 1212 return;
1200} 1213}
1201 1214
@@ -1309,6 +1322,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
1309 ret = nest_pmu_cpumask_init(); 1322 ret = nest_pmu_cpumask_init();
1310 if (ret) { 1323 if (ret) {
1311 mutex_unlock(&nest_init_lock); 1324 mutex_unlock(&nest_init_lock);
1325 kfree(nest_imc_refc);
1326 kfree(per_nest_pmu_arr);
1312 goto err_free; 1327 goto err_free;
1313 } 1328 }
1314 } 1329 }
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index e81c16838b90..9557d8b516df 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -55,8 +55,7 @@ struct bpf_jit {
55#define SEEN_LITERAL 8 /* code uses literals */ 55#define SEEN_LITERAL 8 /* code uses literals */
56#define SEEN_FUNC 16 /* calls C functions */ 56#define SEEN_FUNC 16 /* calls C functions */
57#define SEEN_TAIL_CALL 32 /* code uses tail calls */ 57#define SEEN_TAIL_CALL 32 /* code uses tail calls */
58#define SEEN_SKB_CHANGE 64 /* code changes skb data */ 58#define SEEN_REG_AX 64 /* code uses constant blinding */
59#define SEEN_REG_AX 128 /* code uses constant blinding */
60#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB) 59#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
61 60
62/* 61/*
@@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
448 EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, 447 EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
449 REG_15, 152); 448 REG_15, 152);
450 } 449 }
451 if (jit->seen & SEEN_SKB) 450 if (jit->seen & SEEN_SKB) {
452 emit_load_skb_data_hlen(jit); 451 emit_load_skb_data_hlen(jit);
453 if (jit->seen & SEEN_SKB_CHANGE)
454 /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ 452 /* stg %b1,ST_OFF_SKBP(%r0,%r15) */
455 EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15, 453 EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
456 STK_OFF_SKBP); 454 STK_OFF_SKBP);
455 }
457} 456}
458 457
459/* 458/*
@@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
983 EMIT2(0x0d00, REG_14, REG_W1); 982 EMIT2(0x0d00, REG_14, REG_W1);
984 /* lgr %b0,%r2: load return value into %b0 */ 983 /* lgr %b0,%r2: load return value into %b0 */
985 EMIT4(0xb9040000, BPF_REG_0, REG_2); 984 EMIT4(0xb9040000, BPF_REG_0, REG_2);
986 if (bpf_helper_changes_pkt_data((void *)func)) { 985 if ((jit->seen & SEEN_SKB) &&
987 jit->seen |= SEEN_SKB_CHANGE; 986 bpf_helper_changes_pkt_data((void *)func)) {
988 /* lg %b1,ST_OFF_SKBP(%r15) */ 987 /* lg %b1,ST_OFF_SKBP(%r15) */
989 EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0, 988 EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
990 REG_15, STK_OFF_SKBP); 989 REG_15, STK_OFF_SKBP);
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index be3136f142a9..a8103a84b4ac 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -113,7 +113,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
113 if (!printk_ratelimit()) 113 if (!printk_ratelimit())
114 return; 114 return;
115 115
116 printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x", 116 printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
117 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 117 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
118 tsk->comm, task_pid_nr(tsk), address, 118 tsk->comm, task_pid_nr(tsk), address,
119 (void *)regs->pc, (void *)regs->u_regs[UREG_I7], 119 (void *)regs->pc, (void *)regs->u_regs[UREG_I7],
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 815c03d7a765..41363f46797b 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -154,7 +154,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
154 if (!printk_ratelimit()) 154 if (!printk_ratelimit())
155 return; 155 return;
156 156
157 printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x", 157 printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
158 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 158 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
159 tsk->comm, task_pid_nr(tsk), address, 159 tsk->comm, task_pid_nr(tsk), address,
160 (void *)regs->tpc, (void *)regs->u_regs[UREG_I7], 160 (void *)regs->tpc, (void *)regs->u_regs[UREG_I7],
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 5765e7e711f7..ff5f9cb3039a 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
1245 u8 *func = ((u8 *)__bpf_call_base) + imm; 1245 u8 *func = ((u8 *)__bpf_call_base) + imm;
1246 1246
1247 ctx->saw_call = true; 1247 ctx->saw_call = true;
1248 if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
1249 emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
1248 1250
1249 emit_call((u32 *)func, ctx); 1251 emit_call((u32 *)func, ctx);
1250 emit_nop(ctx); 1252 emit_nop(ctx);
1251 1253
1252 emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); 1254 emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
1253 1255
1254 if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind) 1256 if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
1255 load_skb_regs(ctx, bpf2sparc[BPF_REG_6]); 1257 load_skb_regs(ctx, L7);
1256 break; 1258 break;
1257 } 1259 }
1258 1260
diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
index b668e351fd6c..fca34b2177e2 100644
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm);
15/* 15/*
16 * Needed since we do not use the asm-generic/mm_hooks.h: 16 * Needed since we do not use the asm-generic/mm_hooks.h:
17 */ 17 */
18static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) 18static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
19{ 19{
20 uml_setup_stubs(mm); 20 uml_setup_stubs(mm);
21 return 0;
21} 22}
22extern void arch_exit_mmap(struct mm_struct *mm); 23extern void arch_exit_mmap(struct mm_struct *mm);
23static inline void arch_unmap(struct mm_struct *mm, 24static inline void arch_unmap(struct mm_struct *mm,
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 4e6fcb32620f..428644175956 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -150,7 +150,7 @@ static void show_segv_info(struct uml_pt_regs *regs)
150 if (!printk_ratelimit()) 150 if (!printk_ratelimit())
151 return; 151 return;
152 152
153 printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x", 153 printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x",
154 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 154 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
155 tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi), 155 tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
156 (void *)UPT_IP(regs), (void *)UPT_SP(regs), 156 (void *)UPT_IP(regs), (void *)UPT_SP(regs),
diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h
index 59b06b48f27d..5c205a9cb5a6 100644
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -81,9 +81,10 @@ do { \
81 } \ 81 } \
82} while (0) 82} while (0)
83 83
84static inline void arch_dup_mmap(struct mm_struct *oldmm, 84static inline int arch_dup_mmap(struct mm_struct *oldmm,
85 struct mm_struct *mm) 85 struct mm_struct *mm)
86{ 86{
87 return 0;
87} 88}
88 89
89static inline void arch_unmap(struct mm_struct *mm, 90static inline void arch_unmap(struct mm_struct *mm,
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8eed3f94bfc7..d4fc98c50378 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -926,7 +926,8 @@ config MAXSMP
926config NR_CPUS 926config NR_CPUS
927 int "Maximum number of CPUs" if SMP && !MAXSMP 927 int "Maximum number of CPUs" if SMP && !MAXSMP
928 range 2 8 if SMP && X86_32 && !X86_BIGSMP 928 range 2 8 if SMP && X86_32 && !X86_BIGSMP
929 range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK 929 range 2 64 if SMP && X86_32 && X86_BIGSMP
930 range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
930 range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64 931 range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
931 default "1" if !SMP 932 default "1" if !SMP
932 default "8192" if MAXSMP 933 default "8192" if MAXSMP
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 4838037f97f6..ace8f321a5a1 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -941,9 +941,10 @@ ENTRY(debug)
941 movl %esp, %eax # pt_regs pointer 941 movl %esp, %eax # pt_regs pointer
942 942
943 /* Are we currently on the SYSENTER stack? */ 943 /* Are we currently on the SYSENTER stack? */
944 PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) 944 movl PER_CPU_VAR(cpu_entry_area), %ecx
945 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ 945 addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
946 cmpl $SIZEOF_SYSENTER_stack, %ecx 946 subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
947 cmpl $SIZEOF_entry_stack, %ecx
947 jb .Ldebug_from_sysenter_stack 948 jb .Ldebug_from_sysenter_stack
948 949
949 TRACE_IRQS_OFF 950 TRACE_IRQS_OFF
@@ -984,9 +985,10 @@ ENTRY(nmi)
984 movl %esp, %eax # pt_regs pointer 985 movl %esp, %eax # pt_regs pointer
985 986
986 /* Are we currently on the SYSENTER stack? */ 987 /* Are we currently on the SYSENTER stack? */
987 PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) 988 movl PER_CPU_VAR(cpu_entry_area), %ecx
988 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ 989 addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
989 cmpl $SIZEOF_SYSENTER_stack, %ecx 990 subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
991 cmpl $SIZEOF_entry_stack, %ecx
990 jb .Lnmi_from_sysenter_stack 992 jb .Lnmi_from_sysenter_stack
991 993
992 /* Not on SYSENTER stack. */ 994 /* Not on SYSENTER stack. */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f81d50d7ceac..3d19c830e1b1 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -140,6 +140,64 @@ END(native_usergs_sysret64)
140 * with them due to bugs in both AMD and Intel CPUs. 140 * with them due to bugs in both AMD and Intel CPUs.
141 */ 141 */
142 142
143 .pushsection .entry_trampoline, "ax"
144
145/*
146 * The code in here gets remapped into cpu_entry_area's trampoline. This means
147 * that the assembler and linker have the wrong idea as to where this code
148 * lives (and, in fact, it's mapped more than once, so it's not even at a
149 * fixed address). So we can't reference any symbols outside the entry
150 * trampoline and expect it to work.
151 *
152 * Instead, we carefully abuse %rip-relative addressing.
153 * _entry_trampoline(%rip) refers to the start of the remapped) entry
154 * trampoline. We can thus find cpu_entry_area with this macro:
155 */
156
157#define CPU_ENTRY_AREA \
158 _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
159
160/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
161#define RSP_SCRATCH CPU_ENTRY_AREA_entry_stack + \
162 SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
163
164ENTRY(entry_SYSCALL_64_trampoline)
165 UNWIND_HINT_EMPTY
166 swapgs
167
168 /* Stash the user RSP. */
169 movq %rsp, RSP_SCRATCH
170
171 /* Load the top of the task stack into RSP */
172 movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
173
174 /* Start building the simulated IRET frame. */
175 pushq $__USER_DS /* pt_regs->ss */
176 pushq RSP_SCRATCH /* pt_regs->sp */
177 pushq %r11 /* pt_regs->flags */
178 pushq $__USER_CS /* pt_regs->cs */
179 pushq %rcx /* pt_regs->ip */
180
181 /*
182 * x86 lacks a near absolute jump, and we can't jump to the real
183 * entry text with a relative jump. We could push the target
184 * address and then use retq, but this destroys the pipeline on
185 * many CPUs (wasting over 20 cycles on Sandy Bridge). Instead,
186 * spill RDI and restore it in a second-stage trampoline.
187 */
188 pushq %rdi
189 movq $entry_SYSCALL_64_stage2, %rdi
190 jmp *%rdi
191END(entry_SYSCALL_64_trampoline)
192
193 .popsection
194
195ENTRY(entry_SYSCALL_64_stage2)
196 UNWIND_HINT_EMPTY
197 popq %rdi
198 jmp entry_SYSCALL_64_after_hwframe
199END(entry_SYSCALL_64_stage2)
200
143ENTRY(entry_SYSCALL_64) 201ENTRY(entry_SYSCALL_64)
144 UNWIND_HINT_EMPTY 202 UNWIND_HINT_EMPTY
145 /* 203 /*
@@ -330,8 +388,24 @@ syscall_return_via_sysret:
330 popq %rsi /* skip rcx */ 388 popq %rsi /* skip rcx */
331 popq %rdx 389 popq %rdx
332 popq %rsi 390 popq %rsi
391
392 /*
393 * Now all regs are restored except RSP and RDI.
394 * Save old stack pointer and switch to trampoline stack.
395 */
396 movq %rsp, %rdi
397 movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
398
399 pushq RSP-RDI(%rdi) /* RSP */
400 pushq (%rdi) /* RDI */
401
402 /*
403 * We are on the trampoline stack. All regs except RDI are live.
404 * We can do future final exit work right here.
405 */
406
333 popq %rdi 407 popq %rdi
334 movq RSP-ORIG_RAX(%rsp), %rsp 408 popq %rsp
335 USERGS_SYSRET64 409 USERGS_SYSRET64
336END(entry_SYSCALL_64) 410END(entry_SYSCALL_64)
337 411
@@ -466,12 +540,13 @@ END(irq_entries_start)
466 540
467.macro DEBUG_ENTRY_ASSERT_IRQS_OFF 541.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
468#ifdef CONFIG_DEBUG_ENTRY 542#ifdef CONFIG_DEBUG_ENTRY
469 pushfq 543 pushq %rax
470 testl $X86_EFLAGS_IF, (%rsp) 544 SAVE_FLAGS(CLBR_RAX)
545 testl $X86_EFLAGS_IF, %eax
471 jz .Lokay_\@ 546 jz .Lokay_\@
472 ud2 547 ud2
473.Lokay_\@: 548.Lokay_\@:
474 addq $8, %rsp 549 popq %rax
475#endif 550#endif
476.endm 551.endm
477 552
@@ -563,6 +638,13 @@ END(irq_entries_start)
563/* 0(%rsp): ~(interrupt number) */ 638/* 0(%rsp): ~(interrupt number) */
564 .macro interrupt func 639 .macro interrupt func
565 cld 640 cld
641
642 testb $3, CS-ORIG_RAX(%rsp)
643 jz 1f
644 SWAPGS
645 call switch_to_thread_stack
6461:
647
566 ALLOC_PT_GPREGS_ON_STACK 648 ALLOC_PT_GPREGS_ON_STACK
567 SAVE_C_REGS 649 SAVE_C_REGS
568 SAVE_EXTRA_REGS 650 SAVE_EXTRA_REGS
@@ -572,12 +654,8 @@ END(irq_entries_start)
572 jz 1f 654 jz 1f
573 655
574 /* 656 /*
575 * IRQ from user mode. Switch to kernel gsbase and inform context 657 * IRQ from user mode.
576 * tracking that we're in kernel mode. 658 *
577 */
578 SWAPGS
579
580 /*
581 * We need to tell lockdep that IRQs are off. We can't do this until 659 * We need to tell lockdep that IRQs are off. We can't do this until
582 * we fix gsbase, and we should do it before enter_from_user_mode 660 * we fix gsbase, and we should do it before enter_from_user_mode
583 * (which can take locks). Since TRACE_IRQS_OFF idempotent, 661 * (which can take locks). Since TRACE_IRQS_OFF idempotent,
@@ -630,10 +708,41 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
630 ud2 708 ud2
6311: 7091:
632#endif 710#endif
633 SWAPGS
634 POP_EXTRA_REGS 711 POP_EXTRA_REGS
635 POP_C_REGS 712 popq %r11
636 addq $8, %rsp /* skip regs->orig_ax */ 713 popq %r10
714 popq %r9
715 popq %r8
716 popq %rax
717 popq %rcx
718 popq %rdx
719 popq %rsi
720
721 /*
722 * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
723 * Save old stack pointer and switch to trampoline stack.
724 */
725 movq %rsp, %rdi
726 movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
727
728 /* Copy the IRET frame to the trampoline stack. */
729 pushq 6*8(%rdi) /* SS */
730 pushq 5*8(%rdi) /* RSP */
731 pushq 4*8(%rdi) /* EFLAGS */
732 pushq 3*8(%rdi) /* CS */
733 pushq 2*8(%rdi) /* RIP */
734
735 /* Push user RDI on the trampoline stack. */
736 pushq (%rdi)
737
738 /*
739 * We are on the trampoline stack. All regs except RDI are live.
740 * We can do future final exit work right here.
741 */
742
743 /* Restore RDI. */
744 popq %rdi
745 SWAPGS
637 INTERRUPT_RETURN 746 INTERRUPT_RETURN
638 747
639 748
@@ -829,7 +938,33 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
829/* 938/*
830 * Exception entry points. 939 * Exception entry points.
831 */ 940 */
832#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8) 941#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
942
943/*
944 * Switch to the thread stack. This is called with the IRET frame and
945 * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
946 * space has not been allocated for them.)
947 */
948ENTRY(switch_to_thread_stack)
949 UNWIND_HINT_FUNC
950
951 pushq %rdi
952 movq %rsp, %rdi
953 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
954 UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
955
956 pushq 7*8(%rdi) /* regs->ss */
957 pushq 6*8(%rdi) /* regs->rsp */
958 pushq 5*8(%rdi) /* regs->eflags */
959 pushq 4*8(%rdi) /* regs->cs */
960 pushq 3*8(%rdi) /* regs->ip */
961 pushq 2*8(%rdi) /* regs->orig_ax */
962 pushq 8(%rdi) /* return address */
963 UNWIND_HINT_FUNC
964
965 movq (%rdi), %rdi
966 ret
967END(switch_to_thread_stack)
833 968
834.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 969.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
835ENTRY(\sym) 970ENTRY(\sym)
@@ -848,11 +983,12 @@ ENTRY(\sym)
848 983
849 ALLOC_PT_GPREGS_ON_STACK 984 ALLOC_PT_GPREGS_ON_STACK
850 985
851 .if \paranoid 986 .if \paranoid < 2
852 .if \paranoid == 1
853 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */ 987 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
854 jnz 1f 988 jnz .Lfrom_usermode_switch_stack_\@
855 .endif 989 .endif
990
991 .if \paranoid
856 call paranoid_entry 992 call paranoid_entry
857 .else 993 .else
858 call error_entry 994 call error_entry
@@ -894,20 +1030,15 @@ ENTRY(\sym)
894 jmp error_exit 1030 jmp error_exit
895 .endif 1031 .endif
896 1032
897 .if \paranoid == 1 1033 .if \paranoid < 2
898 /* 1034 /*
899 * Paranoid entry from userspace. Switch stacks and treat it 1035 * Entry from userspace. Switch stacks and treat it
900 * as a normal entry. This means that paranoid handlers 1036 * as a normal entry. This means that paranoid handlers
901 * run in real process context if user_mode(regs). 1037 * run in real process context if user_mode(regs).
902 */ 1038 */
9031: 1039.Lfrom_usermode_switch_stack_\@:
904 call error_entry 1040 call error_entry
905 1041
906
907 movq %rsp, %rdi /* pt_regs pointer */
908 call sync_regs
909 movq %rax, %rsp /* switch stack */
910
911 movq %rsp, %rdi /* pt_regs pointer */ 1042 movq %rsp, %rdi /* pt_regs pointer */
912 1043
913 .if \has_error_code 1044 .if \has_error_code
@@ -1170,6 +1301,14 @@ ENTRY(error_entry)
1170 SWAPGS 1301 SWAPGS
1171 1302
1172.Lerror_entry_from_usermode_after_swapgs: 1303.Lerror_entry_from_usermode_after_swapgs:
1304 /* Put us onto the real thread stack. */
1305 popq %r12 /* save return addr in %12 */
1306 movq %rsp, %rdi /* arg0 = pt_regs pointer */
1307 call sync_regs
1308 movq %rax, %rsp /* switch stack */
1309 ENCODE_FRAME_POINTER
1310 pushq %r12
1311
1173 /* 1312 /*
1174 * We need to tell lockdep that IRQs are off. We can't do this until 1313 * We need to tell lockdep that IRQs are off. We can't do this until
1175 * we fix gsbase, and we should do it before enter_from_user_mode 1314 * we fix gsbase, and we should do it before enter_from_user_mode
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 568e130d932c..95ad40eb7eff 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -48,7 +48,7 @@
48 */ 48 */
49ENTRY(entry_SYSENTER_compat) 49ENTRY(entry_SYSENTER_compat)
50 /* Interrupts are off on entry. */ 50 /* Interrupts are off on entry. */
51 SWAPGS_UNSAFE_STACK 51 SWAPGS
52 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 52 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
53 53
54 /* 54 /*
@@ -306,8 +306,11 @@ ENTRY(entry_INT80_compat)
306 */ 306 */
307 movl %eax, %eax 307 movl %eax, %eax
308 308
309 /* Construct struct pt_regs on stack (iret frame is already on stack) */
310 pushq %rax /* pt_regs->orig_ax */ 309 pushq %rax /* pt_regs->orig_ax */
310
311 /* switch to thread stack expects orig_ax to be pushed */
312 call switch_to_thread_stack
313
311 pushq %rdi /* pt_regs->di */ 314 pushq %rdi /* pt_regs->di */
312 pushq %rsi /* pt_regs->si */ 315 pushq %rsi /* pt_regs->si */
313 pushq %rdx /* pt_regs->dx */ 316 pushq %rdx /* pt_regs->dx */
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index f279ba2643dc..1faf40f2dda9 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -37,6 +37,7 @@
37#include <asm/unistd.h> 37#include <asm/unistd.h>
38#include <asm/fixmap.h> 38#include <asm/fixmap.h>
39#include <asm/traps.h> 39#include <asm/traps.h>
40#include <asm/paravirt.h>
40 41
41#define CREATE_TRACE_POINTS 42#define CREATE_TRACE_POINTS
42#include "vsyscall_trace.h" 43#include "vsyscall_trace.h"
@@ -138,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
138 139
139 WARN_ON_ONCE(address != regs->ip); 140 WARN_ON_ONCE(address != regs->ip);
140 141
142 /* This should be unreachable in NATIVE mode. */
143 if (WARN_ON(vsyscall_mode == NATIVE))
144 return false;
145
141 if (vsyscall_mode == NONE) { 146 if (vsyscall_mode == NONE) {
142 warn_bad_vsyscall(KERN_INFO, regs, 147 warn_bad_vsyscall(KERN_INFO, regs,
143 "vsyscall attempted with vsyscall=none"); 148 "vsyscall attempted with vsyscall=none");
@@ -329,16 +334,47 @@ int in_gate_area_no_mm(unsigned long addr)
329 return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR; 334 return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
330} 335}
331 336
337/*
338 * The VSYSCALL page is the only user-accessible page in the kernel address
339 * range. Normally, the kernel page tables can have _PAGE_USER clear, but
340 * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
341 * are enabled.
342 *
343 * Some day we may create a "minimal" vsyscall mode in which we emulate
344 * vsyscalls but leave the page not present. If so, we skip calling
345 * this.
346 */
347static void __init set_vsyscall_pgtable_user_bits(void)
348{
349 pgd_t *pgd;
350 p4d_t *p4d;
351 pud_t *pud;
352 pmd_t *pmd;
353
354 pgd = pgd_offset_k(VSYSCALL_ADDR);
355 set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
356 p4d = p4d_offset(pgd, VSYSCALL_ADDR);
357#if CONFIG_PGTABLE_LEVELS >= 5
358 p4d->p4d |= _PAGE_USER;
359#endif
360 pud = pud_offset(p4d, VSYSCALL_ADDR);
361 set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
362 pmd = pmd_offset(pud, VSYSCALL_ADDR);
363 set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
364}
365
332void __init map_vsyscall(void) 366void __init map_vsyscall(void)
333{ 367{
334 extern char __vsyscall_page; 368 extern char __vsyscall_page;
335 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); 369 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
336 370
337 if (vsyscall_mode != NONE) 371 if (vsyscall_mode != NONE) {
338 __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, 372 __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
339 vsyscall_mode == NATIVE 373 vsyscall_mode == NATIVE
340 ? PAGE_KERNEL_VSYSCALL 374 ? PAGE_KERNEL_VSYSCALL
341 : PAGE_KERNEL_VVAR); 375 : PAGE_KERNEL_VVAR);
376 set_vsyscall_pgtable_user_bits();
377 }
342 378
343 BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != 379 BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
344 (unsigned long)VSYSCALL_ADDR); 380 (unsigned long)VSYSCALL_ADDR);
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
new file mode 100644
index 000000000000..2fbc69a0916e
--- /dev/null
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -0,0 +1,68 @@
1// SPDX-License-Identifier: GPL-2.0
2
3#ifndef _ASM_X86_CPU_ENTRY_AREA_H
4#define _ASM_X86_CPU_ENTRY_AREA_H
5
6#include <linux/percpu-defs.h>
7#include <asm/processor.h>
8
9/*
10 * cpu_entry_area is a percpu region that contains things needed by the CPU
11 * and early entry/exit code. Real types aren't used for all fields here
12 * to avoid circular header dependencies.
13 *
14 * Every field is a virtual alias of some other allocated backing store.
15 * There is no direct allocation of a struct cpu_entry_area.
16 */
17struct cpu_entry_area {
18 char gdt[PAGE_SIZE];
19
20 /*
21 * The GDT is just below entry_stack and thus serves (on x86_64) as
22 * a a read-only guard page.
23 */
24 struct entry_stack_page entry_stack_page;
25
26 /*
27 * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
28 * we need task switches to work, and task switches write to the TSS.
29 */
30 struct tss_struct tss;
31
32 char entry_trampoline[PAGE_SIZE];
33
34#ifdef CONFIG_X86_64
35 /*
36 * Exception stacks used for IST entries.
37 *
38 * In the future, this should have a separate slot for each stack
39 * with guard pages between them.
40 */
41 char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
42#endif
43};
44
45#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
46#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
47
48DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
49
50extern void setup_cpu_entry_areas(void);
51extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
52
53#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
54#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
55
56#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
57
58#define CPU_ENTRY_AREA_MAP_SIZE \
59 (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
60
61extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
62
63static inline struct entry_stack *cpu_entry_stack(int cpu)
64{
65 return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
66}
67
68#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index bf6a76202a77..ea9a7dde62e5 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
135 set_bit(bit, (unsigned long *)cpu_caps_set); \ 135 set_bit(bit, (unsigned long *)cpu_caps_set); \
136} while (0) 136} while (0)
137 137
138#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
139
138#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) 140#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
139/* 141/*
140 * Static testing of CPU features. Used the same as boot_cpu_has(). 142 * Static testing of CPU features. Used the same as boot_cpu_has().
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 4011cb03ef08..ec8be07c0cda 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -7,6 +7,7 @@
7#include <asm/mmu.h> 7#include <asm/mmu.h>
8#include <asm/fixmap.h> 8#include <asm/fixmap.h>
9#include <asm/irq_vectors.h> 9#include <asm/irq_vectors.h>
10#include <asm/cpu_entry_area.h>
10 11
11#include <linux/smp.h> 12#include <linux/smp.h>
12#include <linux/percpu.h> 13#include <linux/percpu.h>
@@ -60,17 +61,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
60 return this_cpu_ptr(&gdt_page)->gdt; 61 return this_cpu_ptr(&gdt_page)->gdt;
61} 62}
62 63
63/* Get the fixmap index for a specific processor */
64static inline unsigned int get_cpu_gdt_ro_index(int cpu)
65{
66 return FIX_GDT_REMAP_BEGIN + cpu;
67}
68
69/* Provide the fixmap address of the remapped GDT */ 64/* Provide the fixmap address of the remapped GDT */
70static inline struct desc_struct *get_cpu_gdt_ro(int cpu) 65static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
71{ 66{
72 unsigned int idx = get_cpu_gdt_ro_index(cpu); 67 return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
73 return (struct desc_struct *)__fix_to_virt(idx);
74} 68}
75 69
76/* Provide the current read-only GDT */ 70/* Provide the current read-only GDT */
@@ -185,7 +179,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
185#endif 179#endif
186} 180}
187 181
188static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr) 182static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
189{ 183{
190 struct desc_struct *d = get_cpu_gdt_rw(cpu); 184 struct desc_struct *d = get_cpu_gdt_rw(cpu);
191 tss_desc tss; 185 tss_desc tss;
diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h
index 0211029076ea..6777480d8a42 100644
--- a/arch/x86/include/asm/espfix.h
+++ b/arch/x86/include/asm/espfix.h
@@ -2,7 +2,7 @@
2#ifndef _ASM_X86_ESPFIX_H 2#ifndef _ASM_X86_ESPFIX_H
3#define _ASM_X86_ESPFIX_H 3#define _ASM_X86_ESPFIX_H
4 4
5#ifdef CONFIG_X86_64 5#ifdef CONFIG_X86_ESPFIX64
6 6
7#include <asm/percpu.h> 7#include <asm/percpu.h>
8 8
@@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
11 11
12extern void init_espfix_bsp(void); 12extern void init_espfix_bsp(void);
13extern void init_espfix_ap(int cpu); 13extern void init_espfix_ap(int cpu);
14 14#else
15#endif /* CONFIG_X86_64 */ 15static inline void init_espfix_ap(int cpu) { }
16#endif
16 17
17#endif /* _ASM_X86_ESPFIX_H */ 18#endif /* _ASM_X86_ESPFIX_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index b0c505fe9a95..64c4a30e0d39 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -44,7 +44,6 @@ extern unsigned long __FIXADDR_TOP;
44 PAGE_SIZE) 44 PAGE_SIZE)
45#endif 45#endif
46 46
47
48/* 47/*
49 * Here we define all the compile-time 'special' virtual 48 * Here we define all the compile-time 'special' virtual
50 * addresses. The point is to have a constant address at 49 * addresses. The point is to have a constant address at
@@ -84,7 +83,6 @@ enum fixed_addresses {
84 FIX_IO_APIC_BASE_0, 83 FIX_IO_APIC_BASE_0,
85 FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, 84 FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
86#endif 85#endif
87 FIX_RO_IDT, /* Virtual mapping for read-only IDT */
88#ifdef CONFIG_X86_32 86#ifdef CONFIG_X86_32
89 FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ 87 FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
90 FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, 88 FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
@@ -100,9 +98,6 @@ enum fixed_addresses {
100#ifdef CONFIG_X86_INTEL_MID 98#ifdef CONFIG_X86_INTEL_MID
101 FIX_LNW_VRTC, 99 FIX_LNW_VRTC,
102#endif 100#endif
103 /* Fixmap entries to remap the GDTs, one per processor. */
104 FIX_GDT_REMAP_BEGIN,
105 FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
106 101
107#ifdef CONFIG_ACPI_APEI_GHES 102#ifdef CONFIG_ACPI_APEI_GHES
108 /* Used for GHES mapping from assorted contexts */ 103 /* Used for GHES mapping from assorted contexts */
@@ -143,7 +138,7 @@ enum fixed_addresses {
143extern void reserve_top_address(unsigned long reserve); 138extern void reserve_top_address(unsigned long reserve);
144 139
145#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) 140#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
146#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) 141#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
147 142
148extern int fixmaps_set; 143extern int fixmaps_set;
149 144
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 1b0a5abcd8ae..96aa6b9884dc 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -20,16 +20,7 @@
20#ifndef _ASM_X86_HYPERVISOR_H 20#ifndef _ASM_X86_HYPERVISOR_H
21#define _ASM_X86_HYPERVISOR_H 21#define _ASM_X86_HYPERVISOR_H
22 22
23#ifdef CONFIG_HYPERVISOR_GUEST 23/* x86 hypervisor types */
24
25#include <asm/kvm_para.h>
26#include <asm/x86_init.h>
27#include <asm/xen/hypervisor.h>
28
29/*
30 * x86 hypervisor information
31 */
32
33enum x86_hypervisor_type { 24enum x86_hypervisor_type {
34 X86_HYPER_NATIVE = 0, 25 X86_HYPER_NATIVE = 0,
35 X86_HYPER_VMWARE, 26 X86_HYPER_VMWARE,
@@ -39,6 +30,12 @@ enum x86_hypervisor_type {
39 X86_HYPER_KVM, 30 X86_HYPER_KVM,
40}; 31};
41 32
33#ifdef CONFIG_HYPERVISOR_GUEST
34
35#include <asm/kvm_para.h>
36#include <asm/x86_init.h>
37#include <asm/xen/hypervisor.h>
38
42struct hypervisor_x86 { 39struct hypervisor_x86 {
43 /* Hypervisor name */ 40 /* Hypervisor name */
44 const char *name; 41 const char *name;
@@ -58,7 +55,15 @@ struct hypervisor_x86 {
58 55
59extern enum x86_hypervisor_type x86_hyper_type; 56extern enum x86_hypervisor_type x86_hyper_type;
60extern void init_hypervisor_platform(void); 57extern void init_hypervisor_platform(void);
58static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
59{
60 return x86_hyper_type == type;
61}
61#else 62#else
62static inline void init_hypervisor_platform(void) { } 63static inline void init_hypervisor_platform(void) { }
64static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
65{
66 return type == X86_HYPER_NATIVE;
67}
63#endif /* CONFIG_HYPERVISOR_GUEST */ 68#endif /* CONFIG_HYPERVISOR_GUEST */
64#endif /* _ASM_X86_HYPERVISOR_H */ 69#endif /* _ASM_X86_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h
new file mode 100644
index 000000000000..989cfa86de85
--- /dev/null
+++ b/arch/x86/include/asm/invpcid.h
@@ -0,0 +1,53 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_X86_INVPCID
3#define _ASM_X86_INVPCID
4
5static inline void __invpcid(unsigned long pcid, unsigned long addr,
6 unsigned long type)
7{
8 struct { u64 d[2]; } desc = { { pcid, addr } };
9
10 /*
11 * The memory clobber is because the whole point is to invalidate
12 * stale TLB entries and, especially if we're flushing global
13 * mappings, we don't want the compiler to reorder any subsequent
14 * memory accesses before the TLB flush.
15 *
16 * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
17 * invpcid (%rcx), %rax in long mode.
18 */
19 asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
20 : : "m" (desc), "a" (type), "c" (&desc) : "memory");
21}
22
23#define INVPCID_TYPE_INDIV_ADDR 0
24#define INVPCID_TYPE_SINGLE_CTXT 1
25#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
26#define INVPCID_TYPE_ALL_NON_GLOBAL 3
27
28/* Flush all mappings for a given pcid and addr, not including globals. */
29static inline void invpcid_flush_one(unsigned long pcid,
30 unsigned long addr)
31{
32 __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
33}
34
35/* Flush all mappings for a given PCID, not including globals. */
36static inline void invpcid_flush_single_context(unsigned long pcid)
37{
38 __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
39}
40
41/* Flush all mappings, including globals, for all PCIDs. */
42static inline void invpcid_flush_all(void)
43{
44 __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
45}
46
47/* Flush all mappings for all PCIDs except globals. */
48static inline void invpcid_flush_all_nonglobals(void)
49{
50 __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
51}
52
53#endif /* _ASM_X86_INVPCID */
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index c8ef23f2c28f..89f08955fff7 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void)
142 swapgs; \ 142 swapgs; \
143 sysretl 143 sysretl
144 144
145#ifdef CONFIG_DEBUG_ENTRY
146#define SAVE_FLAGS(x) pushfq; popq %rax
147#endif
145#else 148#else
146#define INTERRUPT_RETURN iret 149#define INTERRUPT_RETURN iret
147#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit 150#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index f86a8caa561e..395c9631e000 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
26extern int __must_check __die(const char *, struct pt_regs *, long); 26extern int __must_check __die(const char *, struct pt_regs *, long);
27extern void show_stack_regs(struct pt_regs *regs); 27extern void show_stack_regs(struct pt_regs *regs);
28extern void __show_regs(struct pt_regs *regs, int all); 28extern void __show_regs(struct pt_regs *regs, int all);
29extern void show_iret_regs(struct pt_regs *regs);
29extern unsigned long oops_begin(void); 30extern unsigned long oops_begin(void);
30extern void oops_end(unsigned long, struct pt_regs *, int signr); 31extern void oops_end(unsigned long, struct pt_regs *, int signr);
31 32
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 9ea26f167497..5ff3e8af2c20 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -3,6 +3,7 @@
3#define _ASM_X86_MMU_H 3#define _ASM_X86_MMU_H
4 4
5#include <linux/spinlock.h> 5#include <linux/spinlock.h>
6#include <linux/rwsem.h>
6#include <linux/mutex.h> 7#include <linux/mutex.h>
7#include <linux/atomic.h> 8#include <linux/atomic.h>
8 9
@@ -27,7 +28,8 @@ typedef struct {
27 atomic64_t tlb_gen; 28 atomic64_t tlb_gen;
28 29
29#ifdef CONFIG_MODIFY_LDT_SYSCALL 30#ifdef CONFIG_MODIFY_LDT_SYSCALL
30 struct ldt_struct *ldt; 31 struct rw_semaphore ldt_usr_sem;
32 struct ldt_struct *ldt;
31#endif 33#endif
32 34
33#ifdef CONFIG_X86_64 35#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 6d16d15d09a0..5ede7cae1d67 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -57,11 +57,17 @@ struct ldt_struct {
57/* 57/*
58 * Used for LDT copy/destruction. 58 * Used for LDT copy/destruction.
59 */ 59 */
60int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm); 60static inline void init_new_context_ldt(struct mm_struct *mm)
61{
62 mm->context.ldt = NULL;
63 init_rwsem(&mm->context.ldt_usr_sem);
64}
65int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
61void destroy_context_ldt(struct mm_struct *mm); 66void destroy_context_ldt(struct mm_struct *mm);
62#else /* CONFIG_MODIFY_LDT_SYSCALL */ 67#else /* CONFIG_MODIFY_LDT_SYSCALL */
63static inline int init_new_context_ldt(struct task_struct *tsk, 68static inline void init_new_context_ldt(struct mm_struct *mm) { }
64 struct mm_struct *mm) 69static inline int ldt_dup_context(struct mm_struct *oldmm,
70 struct mm_struct *mm)
65{ 71{
66 return 0; 72 return 0;
67} 73}
@@ -132,18 +138,21 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
132static inline int init_new_context(struct task_struct *tsk, 138static inline int init_new_context(struct task_struct *tsk,
133 struct mm_struct *mm) 139 struct mm_struct *mm)
134{ 140{
141 mutex_init(&mm->context.lock);
142
135 mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); 143 mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
136 atomic64_set(&mm->context.tlb_gen, 0); 144 atomic64_set(&mm->context.tlb_gen, 0);
137 145
138 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS 146#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
139 if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { 147 if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
140 /* pkey 0 is the default and always allocated */ 148 /* pkey 0 is the default and always allocated */
141 mm->context.pkey_allocation_map = 0x1; 149 mm->context.pkey_allocation_map = 0x1;
142 /* -1 means unallocated or invalid */ 150 /* -1 means unallocated or invalid */
143 mm->context.execute_only_pkey = -1; 151 mm->context.execute_only_pkey = -1;
144 } 152 }
145 #endif 153#endif
146 return init_new_context_ldt(tsk, mm); 154 init_new_context_ldt(mm);
155 return 0;
147} 156}
148static inline void destroy_context(struct mm_struct *mm) 157static inline void destroy_context(struct mm_struct *mm)
149{ 158{
@@ -176,10 +185,10 @@ do { \
176} while (0) 185} while (0)
177#endif 186#endif
178 187
179static inline void arch_dup_mmap(struct mm_struct *oldmm, 188static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
180 struct mm_struct *mm)
181{ 189{
182 paravirt_arch_dup_mmap(oldmm, mm); 190 paravirt_arch_dup_mmap(oldmm, mm);
191 return ldt_dup_context(oldmm, mm);
183} 192}
184 193
185static inline void arch_exit_mmap(struct mm_struct *mm) 194static inline void arch_exit_mmap(struct mm_struct *mm)
@@ -282,33 +291,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
282} 291}
283 292
284/* 293/*
285 * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
286 * bits. This serves two purposes. It prevents a nasty situation in
287 * which PCID-unaware code saves CR3, loads some other value (with PCID
288 * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
289 * the saved ASID was nonzero. It also means that any bugs involving
290 * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
291 * deterministically.
292 */
293
294static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
295{
296 if (static_cpu_has(X86_FEATURE_PCID)) {
297 VM_WARN_ON_ONCE(asid > 4094);
298 return __sme_pa(mm->pgd) | (asid + 1);
299 } else {
300 VM_WARN_ON_ONCE(asid != 0);
301 return __sme_pa(mm->pgd);
302 }
303}
304
305static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
306{
307 VM_WARN_ON_ONCE(asid > 4094);
308 return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
309}
310
311/*
312 * This can be used from process context to figure out what the value of 294 * This can be used from process context to figure out what the value of
313 * CR3 is without needing to do a (slow) __read_cr3(). 295 * CR3 is without needing to do a (slow) __read_cr3().
314 * 296 *
@@ -317,7 +299,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
317 */ 299 */
318static inline unsigned long __get_current_cr3_fast(void) 300static inline unsigned long __get_current_cr3_fast(void)
319{ 301{
320 unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm), 302 unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
321 this_cpu_read(cpu_tlbstate.loaded_mm_asid)); 303 this_cpu_read(cpu_tlbstate.loaded_mm_asid));
322 304
323 /* For now, be very restrictive about when this can be called. */ 305 /* For now, be very restrictive about when this can be called. */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 283efcaac8af..892df375b615 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -927,6 +927,15 @@ extern void default_banner(void);
927 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ 927 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
928 CLBR_NONE, \ 928 CLBR_NONE, \
929 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) 929 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
930
931#ifdef CONFIG_DEBUG_ENTRY
932#define SAVE_FLAGS(clobbers) \
933 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
934 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
935 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \
936 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
937#endif
938
930#endif /* CONFIG_X86_32 */ 939#endif /* CONFIG_X86_32 */
931 940
932#endif /* __ASSEMBLY__ */ 941#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index f2ca9b28fd68..ce245b0cdfca 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
38#define LAST_PKMAP 1024 38#define LAST_PKMAP 1024
39#endif 39#endif
40 40
41#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1)) \ 41/*
42 & PMD_MASK) 42 * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
43 * to avoid include recursion hell
44 */
45#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40)
46
47#define CPU_ENTRY_AREA_BASE \
48 ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
49
50#define PKMAP_BASE \
51 ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
43 52
44#ifdef CONFIG_HIGHMEM 53#ifdef CONFIG_HIGHMEM
45# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) 54# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
46#else 55#else
47# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) 56# define VMALLOC_END (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
48#endif 57#endif
49 58
50#define MODULES_VADDR VMALLOC_START 59#define MODULES_VADDR VMALLOC_START
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 6d5f45dcd4a1..3d27831bc58d 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -76,32 +76,41 @@ typedef struct { pteval_t pte; } pte_t;
76#define PGDIR_MASK (~(PGDIR_SIZE - 1)) 76#define PGDIR_MASK (~(PGDIR_SIZE - 1))
77 77
78/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ 78/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
79#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) 79#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
80
80#ifdef CONFIG_X86_5LEVEL 81#ifdef CONFIG_X86_5LEVEL
81#define VMALLOC_SIZE_TB _AC(16384, UL) 82# define VMALLOC_SIZE_TB _AC(16384, UL)
82#define __VMALLOC_BASE _AC(0xff92000000000000, UL) 83# define __VMALLOC_BASE _AC(0xff92000000000000, UL)
83#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) 84# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
84#else 85#else
85#define VMALLOC_SIZE_TB _AC(32, UL) 86# define VMALLOC_SIZE_TB _AC(32, UL)
86#define __VMALLOC_BASE _AC(0xffffc90000000000, UL) 87# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
87#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) 88# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
88#endif 89#endif
90
89#ifdef CONFIG_RANDOMIZE_MEMORY 91#ifdef CONFIG_RANDOMIZE_MEMORY
90#define VMALLOC_START vmalloc_base 92# define VMALLOC_START vmalloc_base
91#define VMEMMAP_START vmemmap_base 93# define VMEMMAP_START vmemmap_base
92#else 94#else
93#define VMALLOC_START __VMALLOC_BASE 95# define VMALLOC_START __VMALLOC_BASE
94#define VMEMMAP_START __VMEMMAP_BASE 96# define VMEMMAP_START __VMEMMAP_BASE
95#endif /* CONFIG_RANDOMIZE_MEMORY */ 97#endif /* CONFIG_RANDOMIZE_MEMORY */
96#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) 98
97#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) 99#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
100
101#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
98/* The module sections ends with the start of the fixmap */ 102/* The module sections ends with the start of the fixmap */
99#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1) 103#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
100#define MODULES_LEN (MODULES_END - MODULES_VADDR) 104#define MODULES_LEN (MODULES_END - MODULES_VADDR)
101#define ESPFIX_PGD_ENTRY _AC(-2, UL) 105
102#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT) 106#define ESPFIX_PGD_ENTRY _AC(-2, UL)
103#define EFI_VA_START ( -4 * (_AC(1, UL) << 30)) 107#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
104#define EFI_VA_END (-68 * (_AC(1, UL) << 30)) 108
109#define CPU_ENTRY_AREA_PGD _AC(-3, UL)
110#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
111
112#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
113#define EFI_VA_END (-68 * (_AC(1, UL) << 30))
105 114
106#define EARLY_DYNAMIC_PAGE_TABLES 64 115#define EARLY_DYNAMIC_PAGE_TABLES 64
107 116
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index cc16fa882e3e..cad8dab266bc 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -163,9 +163,9 @@ enum cpuid_regs_idx {
163extern struct cpuinfo_x86 boot_cpu_data; 163extern struct cpuinfo_x86 boot_cpu_data;
164extern struct cpuinfo_x86 new_cpu_data; 164extern struct cpuinfo_x86 new_cpu_data;
165 165
166extern struct tss_struct doublefault_tss; 166extern struct x86_hw_tss doublefault_tss;
167extern __u32 cpu_caps_cleared[NCAPINTS]; 167extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
168extern __u32 cpu_caps_set[NCAPINTS]; 168extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
169 169
170#ifdef CONFIG_SMP 170#ifdef CONFIG_SMP
171DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); 171DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
@@ -253,6 +253,11 @@ static inline void load_cr3(pgd_t *pgdir)
253 write_cr3(__sme_pa(pgdir)); 253 write_cr3(__sme_pa(pgdir));
254} 254}
255 255
256/*
257 * Note that while the legacy 'TSS' name comes from 'Task State Segment',
258 * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
259 * unrelated to the task-switch mechanism:
260 */
256#ifdef CONFIG_X86_32 261#ifdef CONFIG_X86_32
257/* This is the TSS defined by the hardware. */ 262/* This is the TSS defined by the hardware. */
258struct x86_hw_tss { 263struct x86_hw_tss {
@@ -305,7 +310,13 @@ struct x86_hw_tss {
305struct x86_hw_tss { 310struct x86_hw_tss {
306 u32 reserved1; 311 u32 reserved1;
307 u64 sp0; 312 u64 sp0;
313
314 /*
315 * We store cpu_current_top_of_stack in sp1 so it's always accessible.
316 * Linux does not use ring 1, so sp1 is not otherwise needed.
317 */
308 u64 sp1; 318 u64 sp1;
319
309 u64 sp2; 320 u64 sp2;
310 u64 reserved2; 321 u64 reserved2;
311 u64 ist[7]; 322 u64 ist[7];
@@ -323,12 +334,22 @@ struct x86_hw_tss {
323#define IO_BITMAP_BITS 65536 334#define IO_BITMAP_BITS 65536
324#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) 335#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
325#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) 336#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
326#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap) 337#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
327#define INVALID_IO_BITMAP_OFFSET 0x8000 338#define INVALID_IO_BITMAP_OFFSET 0x8000
328 339
340struct entry_stack {
341 unsigned long words[64];
342};
343
344struct entry_stack_page {
345 struct entry_stack stack;
346} __aligned(PAGE_SIZE);
347
329struct tss_struct { 348struct tss_struct {
330 /* 349 /*
331 * The hardware state: 350 * The fixed hardware portion. This must not cross a page boundary
351 * at risk of violating the SDM's advice and potentially triggering
352 * errata.
332 */ 353 */
333 struct x86_hw_tss x86_tss; 354 struct x86_hw_tss x86_tss;
334 355
@@ -339,18 +360,9 @@ struct tss_struct {
339 * be within the limit. 360 * be within the limit.
340 */ 361 */
341 unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; 362 unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
363} __aligned(PAGE_SIZE);
342 364
343#ifdef CONFIG_X86_32 365DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
344 /*
345 * Space for the temporary SYSENTER stack.
346 */
347 unsigned long SYSENTER_stack_canary;
348 unsigned long SYSENTER_stack[64];
349#endif
350
351} ____cacheline_aligned;
352
353DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
354 366
355/* 367/*
356 * sizeof(unsigned long) coming from an extra "long" at the end 368 * sizeof(unsigned long) coming from an extra "long" at the end
@@ -364,6 +376,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
364 376
365#ifdef CONFIG_X86_32 377#ifdef CONFIG_X86_32
366DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); 378DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
379#else
380/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
381#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
367#endif 382#endif
368 383
369/* 384/*
@@ -523,7 +538,7 @@ static inline void native_set_iopl_mask(unsigned mask)
523static inline void 538static inline void
524native_load_sp0(unsigned long sp0) 539native_load_sp0(unsigned long sp0)
525{ 540{
526 this_cpu_write(cpu_tss.x86_tss.sp0, sp0); 541 this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
527} 542}
528 543
529static inline void native_swapgs(void) 544static inline void native_swapgs(void)
@@ -535,12 +550,12 @@ static inline void native_swapgs(void)
535 550
536static inline unsigned long current_top_of_stack(void) 551static inline unsigned long current_top_of_stack(void)
537{ 552{
538#ifdef CONFIG_X86_64 553 /*
539 return this_cpu_read_stable(cpu_tss.x86_tss.sp0); 554 * We can't read directly from tss.sp0: sp0 on x86_32 is special in
540#else 555 * and around vm86 mode and sp0 on x86_64 is special because of the
541 /* sp0 on x86_32 is special in and around vm86 mode. */ 556 * entry trampoline.
557 */
542 return this_cpu_read_stable(cpu_current_top_of_stack); 558 return this_cpu_read_stable(cpu_current_top_of_stack);
543#endif
544} 559}
545 560
546static inline bool on_thread_stack(void) 561static inline bool on_thread_stack(void)
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 8da111b3c342..f73706878772 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -16,6 +16,7 @@ enum stack_type {
16 STACK_TYPE_TASK, 16 STACK_TYPE_TASK,
17 STACK_TYPE_IRQ, 17 STACK_TYPE_IRQ,
18 STACK_TYPE_SOFTIRQ, 18 STACK_TYPE_SOFTIRQ,
19 STACK_TYPE_ENTRY,
19 STACK_TYPE_EXCEPTION, 20 STACK_TYPE_EXCEPTION,
20 STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, 21 STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
21}; 22};
@@ -28,6 +29,8 @@ struct stack_info {
28bool in_task_stack(unsigned long *stack, struct task_struct *task, 29bool in_task_stack(unsigned long *stack, struct task_struct *task,
29 struct stack_info *info); 30 struct stack_info *info);
30 31
32bool in_entry_stack(unsigned long *stack, struct stack_info *info);
33
31int get_stack_info(unsigned long *stack, struct task_struct *task, 34int get_stack_info(unsigned long *stack, struct task_struct *task,
32 struct stack_info *info, unsigned long *visit_mask); 35 struct stack_info *info, unsigned long *visit_mask);
33 36
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 8c6bd6863db9..9b6df68d8fd1 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -79,10 +79,10 @@ do { \
79static inline void refresh_sysenter_cs(struct thread_struct *thread) 79static inline void refresh_sysenter_cs(struct thread_struct *thread)
80{ 80{
81 /* Only happens when SEP is enabled, no need to test "SEP"arately: */ 81 /* Only happens when SEP is enabled, no need to test "SEP"arately: */
82 if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs)) 82 if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs))
83 return; 83 return;
84 84
85 this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs); 85 this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
86 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); 86 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
87} 87}
88#endif 88#endif
@@ -90,10 +90,12 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
90/* This is used when switching tasks or entering/exiting vm86 mode. */ 90/* This is used when switching tasks or entering/exiting vm86 mode. */
91static inline void update_sp0(struct task_struct *task) 91static inline void update_sp0(struct task_struct *task)
92{ 92{
93 /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
93#ifdef CONFIG_X86_32 94#ifdef CONFIG_X86_32
94 load_sp0(task->thread.sp0); 95 load_sp0(task->thread.sp0);
95#else 96#else
96 load_sp0(task_top_of_stack(task)); 97 if (static_cpu_has(X86_FEATURE_XENPV))
98 load_sp0(task_top_of_stack(task));
97#endif 99#endif
98} 100}
99 101
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 70f425947dc5..00223333821a 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack,
207#else /* !__ASSEMBLY__ */ 207#else /* !__ASSEMBLY__ */
208 208
209#ifdef CONFIG_X86_64 209#ifdef CONFIG_X86_64
210# define cpu_current_top_of_stack (cpu_tss + TSS_sp0) 210# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
211#endif 211#endif
212 212
213#endif 213#endif
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 877b5c1a1b12..e1884cf35257 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -9,70 +9,66 @@
9#include <asm/cpufeature.h> 9#include <asm/cpufeature.h>
10#include <asm/special_insns.h> 10#include <asm/special_insns.h>
11#include <asm/smp.h> 11#include <asm/smp.h>
12#include <asm/invpcid.h>
12 13
13static inline void __invpcid(unsigned long pcid, unsigned long addr, 14static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
14 unsigned long type)
15{ 15{
16 struct { u64 d[2]; } desc = { { pcid, addr } };
17
18 /* 16 /*
19 * The memory clobber is because the whole point is to invalidate 17 * Bump the generation count. This also serves as a full barrier
20 * stale TLB entries and, especially if we're flushing global 18 * that synchronizes with switch_mm(): callers are required to order
21 * mappings, we don't want the compiler to reorder any subsequent 19 * their read of mm_cpumask after their writes to the paging
22 * memory accesses before the TLB flush. 20 * structures.
23 *
24 * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
25 * invpcid (%rcx), %rax in long mode.
26 */ 21 */
27 asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" 22 return atomic64_inc_return(&mm->context.tlb_gen);
28 : : "m" (desc), "a" (type), "c" (&desc) : "memory");
29} 23}
30 24
31#define INVPCID_TYPE_INDIV_ADDR 0 25/* There are 12 bits of space for ASIDS in CR3 */
32#define INVPCID_TYPE_SINGLE_CTXT 1 26#define CR3_HW_ASID_BITS 12
33#define INVPCID_TYPE_ALL_INCL_GLOBAL 2 27/*
34#define INVPCID_TYPE_ALL_NON_GLOBAL 3 28 * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
29 * user/kernel switches
30 */
31#define PTI_CONSUMED_ASID_BITS 0
35 32
36/* Flush all mappings for a given pcid and addr, not including globals. */ 33#define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS)
37static inline void invpcid_flush_one(unsigned long pcid, 34/*
38 unsigned long addr) 35 * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account
39{ 36 * for them being zero-based. Another -1 is because ASID 0 is reserved for
40 __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR); 37 * use by non-PCID-aware users.
41} 38 */
39#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2)
42 40
43/* Flush all mappings for a given PCID, not including globals. */ 41static inline u16 kern_pcid(u16 asid)
44static inline void invpcid_flush_single_context(unsigned long pcid)
45{ 42{
46 __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT); 43 VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
44 /*
45 * If PCID is on, ASID-aware code paths put the ASID+1 into the
46 * PCID bits. This serves two purposes. It prevents a nasty
47 * situation in which PCID-unaware code saves CR3, loads some other
48 * value (with PCID == 0), and then restores CR3, thus corrupting
49 * the TLB for ASID 0 if the saved ASID was nonzero. It also means
50 * that any bugs involving loading a PCID-enabled CR3 with
51 * CR4.PCIDE off will trigger deterministically.
52 */
53 return asid + 1;
47} 54}
48 55
49/* Flush all mappings, including globals, for all PCIDs. */ 56struct pgd_t;
50static inline void invpcid_flush_all(void) 57static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
51{ 58{
52 __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL); 59 if (static_cpu_has(X86_FEATURE_PCID)) {
60 return __sme_pa(pgd) | kern_pcid(asid);
61 } else {
62 VM_WARN_ON_ONCE(asid != 0);
63 return __sme_pa(pgd);
64 }
53} 65}
54 66
55/* Flush all mappings for all PCIDs except globals. */ 67static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
56static inline void invpcid_flush_all_nonglobals(void)
57{ 68{
58 __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL); 69 VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
59} 70 VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID));
60 71 return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
61static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
62{
63 u64 new_tlb_gen;
64
65 /*
66 * Bump the generation count. This also serves as a full barrier
67 * that synchronizes with switch_mm(): callers are required to order
68 * their read of mm_cpumask after their writes to the paging
69 * structures.
70 */
71 smp_mb__before_atomic();
72 new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
73 smp_mb__after_atomic();
74
75 return new_tlb_gen;
76} 72}
77 73
78#ifdef CONFIG_PARAVIRT 74#ifdef CONFIG_PARAVIRT
@@ -237,6 +233,9 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
237 233
238extern void initialize_tlbstate_and_flush(void); 234extern void initialize_tlbstate_and_flush(void);
239 235
236/*
237 * flush the entire current user mapping
238 */
240static inline void __native_flush_tlb(void) 239static inline void __native_flush_tlb(void)
241{ 240{
242 /* 241 /*
@@ -249,20 +248,12 @@ static inline void __native_flush_tlb(void)
249 preempt_enable(); 248 preempt_enable();
250} 249}
251 250
252static inline void __native_flush_tlb_global_irq_disabled(void) 251/*
253{ 252 * flush everything
254 unsigned long cr4; 253 */
255
256 cr4 = this_cpu_read(cpu_tlbstate.cr4);
257 /* clear PGE */
258 native_write_cr4(cr4 & ~X86_CR4_PGE);
259 /* write old PGE again and flush TLBs */
260 native_write_cr4(cr4);
261}
262
263static inline void __native_flush_tlb_global(void) 254static inline void __native_flush_tlb_global(void)
264{ 255{
265 unsigned long flags; 256 unsigned long cr4, flags;
266 257
267 if (static_cpu_has(X86_FEATURE_INVPCID)) { 258 if (static_cpu_has(X86_FEATURE_INVPCID)) {
268 /* 259 /*
@@ -280,22 +271,36 @@ static inline void __native_flush_tlb_global(void)
280 */ 271 */
281 raw_local_irq_save(flags); 272 raw_local_irq_save(flags);
282 273
283 __native_flush_tlb_global_irq_disabled(); 274 cr4 = this_cpu_read(cpu_tlbstate.cr4);
275 /* toggle PGE */
276 native_write_cr4(cr4 ^ X86_CR4_PGE);
277 /* write old PGE again and flush TLBs */
278 native_write_cr4(cr4);
284 279
285 raw_local_irq_restore(flags); 280 raw_local_irq_restore(flags);
286} 281}
287 282
283/*
284 * flush one page in the user mapping
285 */
288static inline void __native_flush_tlb_single(unsigned long addr) 286static inline void __native_flush_tlb_single(unsigned long addr)
289{ 287{
290 asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); 288 asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
291} 289}
292 290
291/*
292 * flush everything
293 */
293static inline void __flush_tlb_all(void) 294static inline void __flush_tlb_all(void)
294{ 295{
295 if (boot_cpu_has(X86_FEATURE_PGE)) 296 if (boot_cpu_has(X86_FEATURE_PGE)) {
296 __flush_tlb_global(); 297 __flush_tlb_global();
297 else 298 } else {
299 /*
300 * !PGE -> !PCID (setup_pcid()), thus every flush is total.
301 */
298 __flush_tlb(); 302 __flush_tlb();
303 }
299 304
300 /* 305 /*
301 * Note: if we somehow had PCID but not PGE, then this wouldn't work -- 306 * Note: if we somehow had PCID but not PGE, then this wouldn't work --
@@ -306,6 +311,9 @@ static inline void __flush_tlb_all(void)
306 */ 311 */
307} 312}
308 313
314/*
315 * flush one page in the kernel mapping
316 */
309static inline void __flush_tlb_one(unsigned long addr) 317static inline void __flush_tlb_one(unsigned long addr)
310{ 318{
311 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); 319 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 1fadd310ff68..31051f35cbb7 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
75dotraplinkage void do_stack_segment(struct pt_regs *, long); 75dotraplinkage void do_stack_segment(struct pt_regs *, long);
76#ifdef CONFIG_X86_64 76#ifdef CONFIG_X86_64
77dotraplinkage void do_double_fault(struct pt_regs *, long); 77dotraplinkage void do_double_fault(struct pt_regs *, long);
78asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
79#endif 78#endif
80dotraplinkage void do_general_protection(struct pt_regs *, long); 79dotraplinkage void do_general_protection(struct pt_regs *, long);
81dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); 80dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index e9cc6fe1fc6f..c1688c2d0a12 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -7,6 +7,9 @@
7#include <asm/ptrace.h> 7#include <asm/ptrace.h>
8#include <asm/stacktrace.h> 8#include <asm/stacktrace.h>
9 9
10#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
11#define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
12
10struct unwind_state { 13struct unwind_state {
11 struct stack_info stack_info; 14 struct stack_info stack_info;
12 unsigned long stack_mask; 15 unsigned long stack_mask;
@@ -52,6 +55,10 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
52} 55}
53 56
54#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) 57#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
58/*
59 * WARNING: The entire pt_regs may not be safe to dereference. In some cases,
60 * only the iret frame registers are accessible. Use with caution!
61 */
55static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) 62static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
56{ 63{
57 if (unwind_done(state)) 64 if (unwind_done(state))
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 8ea78275480d..676b7cf4b62b 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -93,4 +93,10 @@ void common(void) {
93 93
94 BLANK(); 94 BLANK();
95 DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); 95 DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
96
97 /* Layout info for cpu_entry_area */
98 OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
99 OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
100 OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page);
101 DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack));
96} 102}
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index dedf428b20b6..fa1261eefa16 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -47,13 +47,8 @@ void foo(void)
47 BLANK(); 47 BLANK();
48 48
49 /* Offset from the sysenter stack to tss.sp0 */ 49 /* Offset from the sysenter stack to tss.sp0 */
50 DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - 50 DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
51 offsetofend(struct tss_struct, SYSENTER_stack)); 51 offsetofend(struct cpu_entry_area, entry_stack_page.stack));
52
53 /* Offset from cpu_tss to SYSENTER_stack */
54 OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
55 /* Size of SYSENTER_stack */
56 DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
57 52
58#ifdef CONFIG_CC_STACKPROTECTOR 53#ifdef CONFIG_CC_STACKPROTECTOR
59 BLANK(); 54 BLANK();
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 630212fa9b9d..bf51e51d808d 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -23,6 +23,9 @@ int main(void)
23#ifdef CONFIG_PARAVIRT 23#ifdef CONFIG_PARAVIRT
24 OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); 24 OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
25 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); 25 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
26#ifdef CONFIG_DEBUG_ENTRY
27 OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
28#endif
26 BLANK(); 29 BLANK();
27#endif 30#endif
28 31
@@ -63,6 +66,7 @@ int main(void)
63 66
64 OFFSET(TSS_ist, tss_struct, x86_tss.ist); 67 OFFSET(TSS_ist, tss_struct, x86_tss.ist);
65 OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); 68 OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
69 OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
66 BLANK(); 70 BLANK();
67 71
68#ifdef CONFIG_CC_STACKPROTECTOR 72#ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index fa998ca8aa5a..c9757f07d738 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -476,8 +476,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
476 return NULL; /* Not found */ 476 return NULL; /* Not found */
477} 477}
478 478
479__u32 cpu_caps_cleared[NCAPINTS]; 479__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
480__u32 cpu_caps_set[NCAPINTS]; 480__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
481 481
482void load_percpu_segment(int cpu) 482void load_percpu_segment(int cpu)
483{ 483{
@@ -490,28 +490,23 @@ void load_percpu_segment(int cpu)
490 load_stack_canary_segment(); 490 load_stack_canary_segment();
491} 491}
492 492
493/* Setup the fixmap mapping only once per-processor */ 493#ifdef CONFIG_X86_32
494static inline void setup_fixmap_gdt(int cpu) 494/* The 32-bit entry code needs to find cpu_entry_area. */
495{ 495DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
496#ifdef CONFIG_X86_64
497 /* On 64-bit systems, we use a read-only fixmap GDT. */
498 pgprot_t prot = PAGE_KERNEL_RO;
499#else
500 /*
501 * On native 32-bit systems, the GDT cannot be read-only because
502 * our double fault handler uses a task gate, and entering through
503 * a task gate needs to change an available TSS to busy. If the GDT
504 * is read-only, that will triple fault.
505 *
506 * On Xen PV, the GDT must be read-only because the hypervisor requires
507 * it.
508 */
509 pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
510 PAGE_KERNEL_RO : PAGE_KERNEL;
511#endif 496#endif
512 497
513 __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot); 498#ifdef CONFIG_X86_64
514} 499/*
500 * Special IST stacks which the CPU switches to when it calls
501 * an IST-marked descriptor entry. Up to 7 stacks (hardware
502 * limit), all of them are 4K, except the debug stack which
503 * is 8K.
504 */
505static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
506 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
507 [DEBUG_STACK - 1] = DEBUG_STKSZ
508};
509#endif
515 510
516/* Load the original GDT from the per-cpu structure */ 511/* Load the original GDT from the per-cpu structure */
517void load_direct_gdt(int cpu) 512void load_direct_gdt(int cpu)
@@ -747,7 +742,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
747{ 742{
748 int i; 743 int i;
749 744
750 for (i = 0; i < NCAPINTS; i++) { 745 for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
751 c->x86_capability[i] &= ~cpu_caps_cleared[i]; 746 c->x86_capability[i] &= ~cpu_caps_cleared[i];
752 c->x86_capability[i] |= cpu_caps_set[i]; 747 c->x86_capability[i] |= cpu_caps_set[i];
753 } 748 }
@@ -1250,7 +1245,7 @@ void enable_sep_cpu(void)
1250 return; 1245 return;
1251 1246
1252 cpu = get_cpu(); 1247 cpu = get_cpu();
1253 tss = &per_cpu(cpu_tss, cpu); 1248 tss = &per_cpu(cpu_tss_rw, cpu);
1254 1249
1255 /* 1250 /*
1256 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- 1251 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
@@ -1259,11 +1254,7 @@ void enable_sep_cpu(void)
1259 1254
1260 tss->x86_tss.ss1 = __KERNEL_CS; 1255 tss->x86_tss.ss1 = __KERNEL_CS;
1261 wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); 1256 wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
1262 1257 wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
1263 wrmsr(MSR_IA32_SYSENTER_ESP,
1264 (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
1265 0);
1266
1267 wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); 1258 wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
1268 1259
1269 put_cpu(); 1260 put_cpu();
@@ -1357,25 +1348,19 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
1357DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; 1348DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
1358EXPORT_PER_CPU_SYMBOL(__preempt_count); 1349EXPORT_PER_CPU_SYMBOL(__preempt_count);
1359 1350
1360/*
1361 * Special IST stacks which the CPU switches to when it calls
1362 * an IST-marked descriptor entry. Up to 7 stacks (hardware
1363 * limit), all of them are 4K, except the debug stack which
1364 * is 8K.
1365 */
1366static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
1367 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
1368 [DEBUG_STACK - 1] = DEBUG_STKSZ
1369};
1370
1371static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
1372 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
1373
1374/* May not be marked __init: used by software suspend */ 1351/* May not be marked __init: used by software suspend */
1375void syscall_init(void) 1352void syscall_init(void)
1376{ 1353{
1354 extern char _entry_trampoline[];
1355 extern char entry_SYSCALL_64_trampoline[];
1356
1357 int cpu = smp_processor_id();
1358 unsigned long SYSCALL64_entry_trampoline =
1359 (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
1360 (entry_SYSCALL_64_trampoline - _entry_trampoline);
1361
1377 wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); 1362 wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
1378 wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); 1363 wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
1379 1364
1380#ifdef CONFIG_IA32_EMULATION 1365#ifdef CONFIG_IA32_EMULATION
1381 wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); 1366 wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
@@ -1386,7 +1371,7 @@ void syscall_init(void)
1386 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). 1371 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
1387 */ 1372 */
1388 wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); 1373 wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
1389 wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); 1374 wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
1390 wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); 1375 wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
1391#else 1376#else
1392 wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); 1377 wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
@@ -1530,7 +1515,7 @@ void cpu_init(void)
1530 if (cpu) 1515 if (cpu)
1531 load_ucode_ap(); 1516 load_ucode_ap();
1532 1517
1533 t = &per_cpu(cpu_tss, cpu); 1518 t = &per_cpu(cpu_tss_rw, cpu);
1534 oist = &per_cpu(orig_ist, cpu); 1519 oist = &per_cpu(orig_ist, cpu);
1535 1520
1536#ifdef CONFIG_NUMA 1521#ifdef CONFIG_NUMA
@@ -1569,7 +1554,7 @@ void cpu_init(void)
1569 * set up and load the per-CPU TSS 1554 * set up and load the per-CPU TSS
1570 */ 1555 */
1571 if (!oist->ist[0]) { 1556 if (!oist->ist[0]) {
1572 char *estacks = per_cpu(exception_stacks, cpu); 1557 char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
1573 1558
1574 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 1559 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1575 estacks += exception_stack_sizes[v]; 1560 estacks += exception_stack_sizes[v];
@@ -1580,7 +1565,7 @@ void cpu_init(void)
1580 } 1565 }
1581 } 1566 }
1582 1567
1583 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); 1568 t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
1584 1569
1585 /* 1570 /*
1586 * <= is required because the CPU will access up to 1571 * <= is required because the CPU will access up to
@@ -1596,11 +1581,12 @@ void cpu_init(void)
1596 enter_lazy_tlb(&init_mm, me); 1581 enter_lazy_tlb(&init_mm, me);
1597 1582
1598 /* 1583 /*
1599 * Initialize the TSS. Don't bother initializing sp0, as the initial 1584 * Initialize the TSS. sp0 points to the entry trampoline stack
1600 * task never enters user mode. 1585 * regardless of what task is running.
1601 */ 1586 */
1602 set_tss_desc(cpu, t); 1587 set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
1603 load_TR_desc(); 1588 load_TR_desc();
1589 load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
1604 1590
1605 load_mm_ldt(&init_mm); 1591 load_mm_ldt(&init_mm);
1606 1592
@@ -1612,7 +1598,6 @@ void cpu_init(void)
1612 if (is_uv_system()) 1598 if (is_uv_system())
1613 uv_cpu_init(); 1599 uv_cpu_init();
1614 1600
1615 setup_fixmap_gdt(cpu);
1616 load_fixmap_gdt(cpu); 1601 load_fixmap_gdt(cpu);
1617} 1602}
1618 1603
@@ -1622,7 +1607,7 @@ void cpu_init(void)
1622{ 1607{
1623 int cpu = smp_processor_id(); 1608 int cpu = smp_processor_id();
1624 struct task_struct *curr = current; 1609 struct task_struct *curr = current;
1625 struct tss_struct *t = &per_cpu(cpu_tss, cpu); 1610 struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
1626 1611
1627 wait_for_master_cpu(cpu); 1612 wait_for_master_cpu(cpu);
1628 1613
@@ -1657,12 +1642,12 @@ void cpu_init(void)
1657 * Initialize the TSS. Don't bother initializing sp0, as the initial 1642 * Initialize the TSS. Don't bother initializing sp0, as the initial
1658 * task never enters user mode. 1643 * task never enters user mode.
1659 */ 1644 */
1660 set_tss_desc(cpu, t); 1645 set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
1661 load_TR_desc(); 1646 load_TR_desc();
1662 1647
1663 load_mm_ldt(&init_mm); 1648 load_mm_ldt(&init_mm);
1664 1649
1665 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); 1650 t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
1666 1651
1667#ifdef CONFIG_DOUBLEFAULT 1652#ifdef CONFIG_DOUBLEFAULT
1668 /* Set up doublefault TSS pointer in the GDT */ 1653 /* Set up doublefault TSS pointer in the GDT */
@@ -1674,7 +1659,6 @@ void cpu_init(void)
1674 1659
1675 fpu__init_cpu(); 1660 fpu__init_cpu();
1676 1661
1677 setup_fixmap_gdt(cpu);
1678 load_fixmap_gdt(cpu); 1662 load_fixmap_gdt(cpu);
1679} 1663}
1680#endif 1664#endif
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 7dbcb7adf797..8ccdca6d3f9e 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -565,15 +565,6 @@ static void print_ucode(struct ucode_cpu_info *uci)
565} 565}
566#else 566#else
567 567
568/*
569 * Flush global tlb. We only do this in x86_64 where paging has been enabled
570 * already and PGE should be enabled as well.
571 */
572static inline void flush_tlb_early(void)
573{
574 __native_flush_tlb_global_irq_disabled();
575}
576
577static inline void print_ucode(struct ucode_cpu_info *uci) 568static inline void print_ucode(struct ucode_cpu_info *uci)
578{ 569{
579 struct microcode_intel *mc; 570 struct microcode_intel *mc;
@@ -602,10 +593,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
602 if (rev != mc->hdr.rev) 593 if (rev != mc->hdr.rev)
603 return -1; 594 return -1;
604 595
605#ifdef CONFIG_X86_64
606 /* Flush global tlb. This is precaution. */
607 flush_tlb_early();
608#endif
609 uci->cpu_sig.rev = rev; 596 uci->cpu_sig.rev = rev;
610 597
611 if (early) 598 if (early)
diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
index 0e662c55ae90..0b8cedb20d6d 100644
--- a/arch/x86/kernel/doublefault.c
+++ b/arch/x86/kernel/doublefault.c
@@ -50,25 +50,23 @@ static void doublefault_fn(void)
50 cpu_relax(); 50 cpu_relax();
51} 51}
52 52
53struct tss_struct doublefault_tss __cacheline_aligned = { 53struct x86_hw_tss doublefault_tss __cacheline_aligned = {
54 .x86_tss = { 54 .sp0 = STACK_START,
55 .sp0 = STACK_START, 55 .ss0 = __KERNEL_DS,
56 .ss0 = __KERNEL_DS, 56 .ldt = 0,
57 .ldt = 0, 57 .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
58 .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, 58
59 59 .ip = (unsigned long) doublefault_fn,
60 .ip = (unsigned long) doublefault_fn, 60 /* 0x2 bit is always set */
61 /* 0x2 bit is always set */ 61 .flags = X86_EFLAGS_SF | 0x2,
62 .flags = X86_EFLAGS_SF | 0x2, 62 .sp = STACK_START,
63 .sp = STACK_START, 63 .es = __USER_DS,
64 .es = __USER_DS, 64 .cs = __KERNEL_CS,
65 .cs = __KERNEL_CS, 65 .ss = __KERNEL_DS,
66 .ss = __KERNEL_DS, 66 .ds = __USER_DS,
67 .ds = __USER_DS, 67 .fs = __KERNEL_PERCPU,
68 .fs = __KERNEL_PERCPU, 68
69 69 .__cr3 = __pa_nodebug(swapper_pg_dir),
70 .__cr3 = __pa_nodebug(swapper_pg_dir),
71 }
72}; 70};
73 71
74/* dummy for do_double_fault() call */ 72/* dummy for do_double_fault() call */
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index f13b4c00a5de..36b17e0febe8 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -18,6 +18,7 @@
18#include <linux/nmi.h> 18#include <linux/nmi.h>
19#include <linux/sysfs.h> 19#include <linux/sysfs.h>
20 20
21#include <asm/cpu_entry_area.h>
21#include <asm/stacktrace.h> 22#include <asm/stacktrace.h>
22#include <asm/unwind.h> 23#include <asm/unwind.h>
23 24
@@ -43,6 +44,24 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
43 return true; 44 return true;
44} 45}
45 46
47bool in_entry_stack(unsigned long *stack, struct stack_info *info)
48{
49 struct entry_stack *ss = cpu_entry_stack(smp_processor_id());
50
51 void *begin = ss;
52 void *end = ss + 1;
53
54 if ((void *)stack < begin || (void *)stack >= end)
55 return false;
56
57 info->type = STACK_TYPE_ENTRY;
58 info->begin = begin;
59 info->end = end;
60 info->next_sp = NULL;
61
62 return true;
63}
64
46static void printk_stack_address(unsigned long address, int reliable, 65static void printk_stack_address(unsigned long address, int reliable,
47 char *log_lvl) 66 char *log_lvl)
48{ 67{
@@ -50,6 +69,28 @@ static void printk_stack_address(unsigned long address, int reliable,
50 printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); 69 printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
51} 70}
52 71
72void show_iret_regs(struct pt_regs *regs)
73{
74 printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
75 printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
76 regs->sp, regs->flags);
77}
78
79static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
80{
81 if (on_stack(info, regs, sizeof(*regs)))
82 __show_regs(regs, 0);
83 else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
84 IRET_FRAME_SIZE)) {
85 /*
86 * When an interrupt or exception occurs in entry code, the
87 * full pt_regs might not have been saved yet. In that case
88 * just print the iret frame.
89 */
90 show_iret_regs(regs);
91 }
92}
93
53void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 94void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
54 unsigned long *stack, char *log_lvl) 95 unsigned long *stack, char *log_lvl)
55{ 96{
@@ -71,31 +112,35 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
71 * - task stack 112 * - task stack
72 * - interrupt stack 113 * - interrupt stack
73 * - HW exception stacks (double fault, nmi, debug, mce) 114 * - HW exception stacks (double fault, nmi, debug, mce)
115 * - entry stack
74 * 116 *
75 * x86-32 can have up to three stacks: 117 * x86-32 can have up to four stacks:
76 * - task stack 118 * - task stack
77 * - softirq stack 119 * - softirq stack
78 * - hardirq stack 120 * - hardirq stack
121 * - entry stack
79 */ 122 */
80 for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { 123 for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
81 const char *stack_name; 124 const char *stack_name;
82 125
83 /* 126 if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
84 * If we overflowed the task stack into a guard page, jump back 127 /*
85 * to the bottom of the usable stack. 128 * We weren't on a valid stack. It's possible that
86 */ 129 * we overflowed a valid stack into a guard page.
87 if (task_stack_page(task) - (void *)stack < PAGE_SIZE) 130 * See if the next page up is valid so that we can
88 stack = task_stack_page(task); 131 * generate some kind of backtrace if this happens.
89 132 */
90 if (get_stack_info(stack, task, &stack_info, &visit_mask)) 133 stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
91 break; 134 if (get_stack_info(stack, task, &stack_info, &visit_mask))
135 break;
136 }
92 137
93 stack_name = stack_type_name(stack_info.type); 138 stack_name = stack_type_name(stack_info.type);
94 if (stack_name) 139 if (stack_name)
95 printk("%s <%s>\n", log_lvl, stack_name); 140 printk("%s <%s>\n", log_lvl, stack_name);
96 141
97 if (regs && on_stack(&stack_info, regs, sizeof(*regs))) 142 if (regs)
98 __show_regs(regs, 0); 143 show_regs_safe(&stack_info, regs);
99 144
100 /* 145 /*
101 * Scan the stack, printing any text addresses we find. At the 146 * Scan the stack, printing any text addresses we find. At the
@@ -119,7 +164,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
119 164
120 /* 165 /*
121 * Don't print regs->ip again if it was already printed 166 * Don't print regs->ip again if it was already printed
122 * by __show_regs() below. 167 * by show_regs_safe() below.
123 */ 168 */
124 if (regs && stack == &regs->ip) 169 if (regs && stack == &regs->ip)
125 goto next; 170 goto next;
@@ -155,8 +200,8 @@ next:
155 200
156 /* if the frame has entry regs, print them */ 201 /* if the frame has entry regs, print them */
157 regs = unwind_get_entry_regs(&state); 202 regs = unwind_get_entry_regs(&state);
158 if (regs && on_stack(&stack_info, regs, sizeof(*regs))) 203 if (regs)
159 __show_regs(regs, 0); 204 show_regs_safe(&stack_info, regs);
160 } 205 }
161 206
162 if (stack_name) 207 if (stack_name)
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index daefae83a3aa..04170f63e3a1 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type)
26 if (type == STACK_TYPE_SOFTIRQ) 26 if (type == STACK_TYPE_SOFTIRQ)
27 return "SOFTIRQ"; 27 return "SOFTIRQ";
28 28
29 if (type == STACK_TYPE_ENTRY)
30 return "ENTRY_TRAMPOLINE";
31
29 return NULL; 32 return NULL;
30} 33}
31 34
@@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
93 if (task != current) 96 if (task != current)
94 goto unknown; 97 goto unknown;
95 98
99 if (in_entry_stack(stack, info))
100 goto recursion_check;
101
96 if (in_hardirq_stack(stack, info)) 102 if (in_hardirq_stack(stack, info))
97 goto recursion_check; 103 goto recursion_check;
98 104
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 88ce2ffdb110..563e28d14f2c 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -37,6 +37,15 @@ const char *stack_type_name(enum stack_type type)
37 if (type == STACK_TYPE_IRQ) 37 if (type == STACK_TYPE_IRQ)
38 return "IRQ"; 38 return "IRQ";
39 39
40 if (type == STACK_TYPE_ENTRY) {
41 /*
42 * On 64-bit, we have a generic entry stack that we
43 * use for all the kernel entry points, including
44 * SYSENTER.
45 */
46 return "ENTRY_TRAMPOLINE";
47 }
48
40 if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST) 49 if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
41 return exception_stack_names[type - STACK_TYPE_EXCEPTION]; 50 return exception_stack_names[type - STACK_TYPE_EXCEPTION];
42 51
@@ -115,6 +124,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
115 if (in_irq_stack(stack, info)) 124 if (in_irq_stack(stack, info))
116 goto recursion_check; 125 goto recursion_check;
117 126
127 if (in_entry_stack(stack, info))
128 goto recursion_check;
129
118 goto unknown; 130 goto unknown;
119 131
120recursion_check: 132recursion_check:
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 3feb648781c4..2f723301eb58 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
67 * because the ->io_bitmap_max value must match the bitmap 67 * because the ->io_bitmap_max value must match the bitmap
68 * contents: 68 * contents:
69 */ 69 */
70 tss = &per_cpu(cpu_tss, get_cpu()); 70 tss = &per_cpu(cpu_tss_rw, get_cpu());
71 71
72 if (turn_on) 72 if (turn_on)
73 bitmap_clear(t->io_bitmap_ptr, from, num); 73 bitmap_clear(t->io_bitmap_ptr, from, num);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 49cfd9fe7589..68e1867cca80 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
219 /* high bit used in ret_from_ code */ 219 /* high bit used in ret_from_ code */
220 unsigned vector = ~regs->orig_ax; 220 unsigned vector = ~regs->orig_ax;
221 221
222 /*
223 * NB: Unlike exception entries, IRQ entries do not reliably
224 * handle context tracking in the low-level entry code. This is
225 * because syscall entries execute briefly with IRQs on before
226 * updating context tracking state, so we can take an IRQ from
227 * kernel mode with CONTEXT_USER. The low-level entry code only
228 * updates the context if we came from user mode, so we won't
229 * switch to CONTEXT_KERNEL. We'll fix that once the syscall
230 * code is cleaned up enough that we can cleanly defer enabling
231 * IRQs.
232 */
233
234 entering_irq(); 222 entering_irq();
235 223
236 /* entering_irq() tells RCU that we're not quiescent. Check it. */ 224 /* entering_irq() tells RCU that we're not quiescent. Check it. */
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 020efbf5786b..d86e344f5b3d 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs)
57 if (regs->sp >= estack_top && regs->sp <= estack_bottom) 57 if (regs->sp >= estack_top && regs->sp <= estack_bottom)
58 return; 58 return;
59 59
60 WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n", 60 WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
61 current->comm, curbase, regs->sp, 61 current->comm, curbase, regs->sp,
62 irq_stack_top, irq_stack_bottom, 62 irq_stack_top, irq_stack_bottom,
63 estack_top, estack_bottom); 63 estack_top, estack_bottom, (void *)regs->ip);
64 64
65 if (sysctl_panic_on_stackoverflow) 65 if (sysctl_panic_on_stackoverflow)
66 panic("low stack detected by irq handler - check messages\n"); 66 panic("low stack detected by irq handler - check messages\n");
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 1c1eae961340..a6b5d62f45a7 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -5,6 +5,11 @@
5 * Copyright (C) 2002 Andi Kleen 5 * Copyright (C) 2002 Andi Kleen
6 * 6 *
7 * This handles calls from both 32bit and 64bit mode. 7 * This handles calls from both 32bit and 64bit mode.
8 *
9 * Lock order:
10 * contex.ldt_usr_sem
11 * mmap_sem
12 * context.lock
8 */ 13 */
9 14
10#include <linux/errno.h> 15#include <linux/errno.h>
@@ -42,7 +47,7 @@ static void refresh_ldt_segments(void)
42#endif 47#endif
43} 48}
44 49
45/* context.lock is held for us, so we don't need any locking. */ 50/* context.lock is held by the task which issued the smp function call */
46static void flush_ldt(void *__mm) 51static void flush_ldt(void *__mm)
47{ 52{
48 struct mm_struct *mm = __mm; 53 struct mm_struct *mm = __mm;
@@ -99,15 +104,17 @@ static void finalize_ldt_struct(struct ldt_struct *ldt)
99 paravirt_alloc_ldt(ldt->entries, ldt->nr_entries); 104 paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
100} 105}
101 106
102/* context.lock is held */ 107static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
103static void install_ldt(struct mm_struct *current_mm,
104 struct ldt_struct *ldt)
105{ 108{
109 mutex_lock(&mm->context.lock);
110
106 /* Synchronizes with READ_ONCE in load_mm_ldt. */ 111 /* Synchronizes with READ_ONCE in load_mm_ldt. */
107 smp_store_release(&current_mm->context.ldt, ldt); 112 smp_store_release(&mm->context.ldt, ldt);
108 113
109 /* Activate the LDT for all CPUs using current_mm. */ 114 /* Activate the LDT for all CPUs using currents mm. */
110 on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true); 115 on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
116
117 mutex_unlock(&mm->context.lock);
111} 118}
112 119
113static void free_ldt_struct(struct ldt_struct *ldt) 120static void free_ldt_struct(struct ldt_struct *ldt)
@@ -124,27 +131,20 @@ static void free_ldt_struct(struct ldt_struct *ldt)
124} 131}
125 132
126/* 133/*
127 * we do not have to muck with descriptors here, that is 134 * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
128 * done in switch_mm() as needed. 135 * the new task is not running, so nothing can be installed.
129 */ 136 */
130int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm) 137int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
131{ 138{
132 struct ldt_struct *new_ldt; 139 struct ldt_struct *new_ldt;
133 struct mm_struct *old_mm;
134 int retval = 0; 140 int retval = 0;
135 141
136 mutex_init(&mm->context.lock); 142 if (!old_mm)
137 old_mm = current->mm;
138 if (!old_mm) {
139 mm->context.ldt = NULL;
140 return 0; 143 return 0;
141 }
142 144
143 mutex_lock(&old_mm->context.lock); 145 mutex_lock(&old_mm->context.lock);
144 if (!old_mm->context.ldt) { 146 if (!old_mm->context.ldt)
145 mm->context.ldt = NULL;
146 goto out_unlock; 147 goto out_unlock;
147 }
148 148
149 new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries); 149 new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
150 if (!new_ldt) { 150 if (!new_ldt) {
@@ -180,7 +180,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
180 unsigned long entries_size; 180 unsigned long entries_size;
181 int retval; 181 int retval;
182 182
183 mutex_lock(&mm->context.lock); 183 down_read(&mm->context.ldt_usr_sem);
184 184
185 if (!mm->context.ldt) { 185 if (!mm->context.ldt) {
186 retval = 0; 186 retval = 0;
@@ -209,7 +209,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
209 retval = bytecount; 209 retval = bytecount;
210 210
211out_unlock: 211out_unlock:
212 mutex_unlock(&mm->context.lock); 212 up_read(&mm->context.ldt_usr_sem);
213 return retval; 213 return retval;
214} 214}
215 215
@@ -269,7 +269,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
269 ldt.avl = 0; 269 ldt.avl = 0;
270 } 270 }
271 271
272 mutex_lock(&mm->context.lock); 272 if (down_write_killable(&mm->context.ldt_usr_sem))
273 return -EINTR;
273 274
274 old_ldt = mm->context.ldt; 275 old_ldt = mm->context.ldt;
275 old_nr_entries = old_ldt ? old_ldt->nr_entries : 0; 276 old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
@@ -291,7 +292,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
291 error = 0; 292 error = 0;
292 293
293out_unlock: 294out_unlock:
294 mutex_unlock(&mm->context.lock); 295 up_write(&mm->context.ldt_usr_sem);
295out: 296out:
296 return error; 297 return error;
297} 298}
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index ac0be8283325..9edadabf04f6 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
10DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); 10DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
11DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); 11DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
12DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); 12DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
13DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
14DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); 13DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
15 14
16DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); 15DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
@@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
60 PATCH_SITE(pv_mmu_ops, read_cr2); 59 PATCH_SITE(pv_mmu_ops, read_cr2);
61 PATCH_SITE(pv_mmu_ops, read_cr3); 60 PATCH_SITE(pv_mmu_ops, read_cr3);
62 PATCH_SITE(pv_mmu_ops, write_cr3); 61 PATCH_SITE(pv_mmu_ops, write_cr3);
63 PATCH_SITE(pv_mmu_ops, flush_tlb_single);
64 PATCH_SITE(pv_cpu_ops, wbinvd); 62 PATCH_SITE(pv_cpu_ops, wbinvd);
65#if defined(CONFIG_PARAVIRT_SPINLOCKS) 63#if defined(CONFIG_PARAVIRT_SPINLOCKS)
66 case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): 64 case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index bb988a24db92..aed9d94bd46f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -47,7 +47,7 @@
47 * section. Since TSS's are completely CPU-local, we want them 47 * section. Since TSS's are completely CPU-local, we want them
48 * on exact cacheline boundaries, to eliminate cacheline ping-pong. 48 * on exact cacheline boundaries, to eliminate cacheline ping-pong.
49 */ 49 */
50__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { 50__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
51 .x86_tss = { 51 .x86_tss = {
52 /* 52 /*
53 * .sp0 is only used when entering ring 0 from a lower 53 * .sp0 is only used when entering ring 0 from a lower
@@ -56,6 +56,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
56 * Poison it. 56 * Poison it.
57 */ 57 */
58 .sp0 = (1UL << (BITS_PER_LONG-1)) + 1, 58 .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
59
60#ifdef CONFIG_X86_64
61 /*
62 * .sp1 is cpu_current_top_of_stack. The init task never
63 * runs user code, but cpu_current_top_of_stack should still
64 * be well defined before the first context switch.
65 */
66 .sp1 = TOP_OF_INIT_STACK,
67#endif
68
59#ifdef CONFIG_X86_32 69#ifdef CONFIG_X86_32
60 .ss0 = __KERNEL_DS, 70 .ss0 = __KERNEL_DS,
61 .ss1 = __KERNEL_CS, 71 .ss1 = __KERNEL_CS,
@@ -71,11 +81,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
71 */ 81 */
72 .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, 82 .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
73#endif 83#endif
74#ifdef CONFIG_X86_32
75 .SYSENTER_stack_canary = STACK_END_MAGIC,
76#endif
77}; 84};
78EXPORT_PER_CPU_SYMBOL(cpu_tss); 85EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
79 86
80DEFINE_PER_CPU(bool, __tss_limit_invalid); 87DEFINE_PER_CPU(bool, __tss_limit_invalid);
81EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid); 88EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
@@ -104,7 +111,7 @@ void exit_thread(struct task_struct *tsk)
104 struct fpu *fpu = &t->fpu; 111 struct fpu *fpu = &t->fpu;
105 112
106 if (bp) { 113 if (bp) {
107 struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu()); 114 struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
108 115
109 t->io_bitmap_ptr = NULL; 116 t->io_bitmap_ptr = NULL;
110 clear_thread_flag(TIF_IO_BITMAP); 117 clear_thread_flag(TIF_IO_BITMAP);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 45bf0c5f93e1..5224c6099184 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
234 struct fpu *prev_fpu = &prev->fpu; 234 struct fpu *prev_fpu = &prev->fpu;
235 struct fpu *next_fpu = &next->fpu; 235 struct fpu *next_fpu = &next->fpu;
236 int cpu = smp_processor_id(); 236 int cpu = smp_processor_id();
237 struct tss_struct *tss = &per_cpu(cpu_tss, cpu); 237 struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
238 238
239 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ 239 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
240 240
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index eeeb34f85c25..c75466232016 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all)
69 unsigned int fsindex, gsindex; 69 unsigned int fsindex, gsindex;
70 unsigned int ds, cs, es; 70 unsigned int ds, cs, es;
71 71
72 printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip); 72 show_iret_regs(regs);
73 printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss, 73
74 regs->sp, regs->flags);
75 if (regs->orig_ax != -1) 74 if (regs->orig_ax != -1)
76 pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax); 75 pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
77 else 76 else
@@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all)
88 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", 87 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
89 regs->r13, regs->r14, regs->r15); 88 regs->r13, regs->r14, regs->r15);
90 89
90 if (!all)
91 return;
92
91 asm("movl %%ds,%0" : "=r" (ds)); 93 asm("movl %%ds,%0" : "=r" (ds));
92 asm("movl %%cs,%0" : "=r" (cs)); 94 asm("movl %%cs,%0" : "=r" (cs));
93 asm("movl %%es,%0" : "=r" (es)); 95 asm("movl %%es,%0" : "=r" (es));
@@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all)
98 rdmsrl(MSR_GS_BASE, gs); 100 rdmsrl(MSR_GS_BASE, gs);
99 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 101 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
100 102
101 if (!all)
102 return;
103
104 cr0 = read_cr0(); 103 cr0 = read_cr0();
105 cr2 = read_cr2(); 104 cr2 = read_cr2();
106 cr3 = __read_cr3(); 105 cr3 = __read_cr3();
@@ -400,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
400 struct fpu *prev_fpu = &prev->fpu; 399 struct fpu *prev_fpu = &prev->fpu;
401 struct fpu *next_fpu = &next->fpu; 400 struct fpu *next_fpu = &next->fpu;
402 int cpu = smp_processor_id(); 401 int cpu = smp_processor_id();
403 struct tss_struct *tss = &per_cpu(cpu_tss, cpu); 402 struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
404 403
405 WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && 404 WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
406 this_cpu_read(irq_count) != -1); 405 this_cpu_read(irq_count) != -1);
@@ -462,6 +461,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
462 * Switch the PDA and FPU contexts. 461 * Switch the PDA and FPU contexts.
463 */ 462 */
464 this_cpu_write(current_task, next_p); 463 this_cpu_write(current_task, next_p);
464 this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
465 465
466 /* Reload sp0. */ 466 /* Reload sp0. */
467 update_sp0(next_p); 467 update_sp0(next_p);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 35cb20994e32..c5970efa8557 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -932,12 +932,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
932 initial_code = (unsigned long)start_secondary; 932 initial_code = (unsigned long)start_secondary;
933 initial_stack = idle->thread.sp; 933 initial_stack = idle->thread.sp;
934 934
935 /* 935 /* Enable the espfix hack for this CPU */
936 * Enable the espfix hack for this CPU
937 */
938#ifdef CONFIG_X86_ESPFIX64
939 init_espfix_ap(cpu); 936 init_espfix_ap(cpu);
940#endif
941 937
942 /* So we see what's up */ 938 /* So we see what's up */
943 announce_cpu(cpu, apicid); 939 announce_cpu(cpu, apicid);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 989514c94a55..f69dbd47d733 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -51,6 +51,7 @@
51#include <asm/traps.h> 51#include <asm/traps.h>
52#include <asm/desc.h> 52#include <asm/desc.h>
53#include <asm/fpu/internal.h> 53#include <asm/fpu/internal.h>
54#include <asm/cpu_entry_area.h>
54#include <asm/mce.h> 55#include <asm/mce.h>
55#include <asm/fixmap.h> 56#include <asm/fixmap.h>
56#include <asm/mach_traps.h> 57#include <asm/mach_traps.h>
@@ -348,9 +349,15 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
348 349
349 /* 350 /*
350 * If IRET takes a non-IST fault on the espfix64 stack, then we 351 * If IRET takes a non-IST fault on the espfix64 stack, then we
351 * end up promoting it to a doublefault. In that case, modify 352 * end up promoting it to a doublefault. In that case, take
352 * the stack to make it look like we just entered the #GP 353 * advantage of the fact that we're not using the normal (TSS.sp0)
353 * handler from user space, similar to bad_iret. 354 * stack right now. We can write a fake #GP(0) frame at TSS.sp0
355 * and then modify our own IRET frame so that, when we return,
356 * we land directly at the #GP(0) vector with the stack already
357 * set up according to its expectations.
358 *
359 * The net result is that our #GP handler will think that we
360 * entered from usermode with the bad user context.
354 * 361 *
355 * No need for ist_enter here because we don't use RCU. 362 * No need for ist_enter here because we don't use RCU.
356 */ 363 */
@@ -358,13 +365,26 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
358 regs->cs == __KERNEL_CS && 365 regs->cs == __KERNEL_CS &&
359 regs->ip == (unsigned long)native_irq_return_iret) 366 regs->ip == (unsigned long)native_irq_return_iret)
360 { 367 {
361 struct pt_regs *normal_regs = task_pt_regs(current); 368 struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
362 369
363 /* Fake a #GP(0) from userspace. */ 370 /*
364 memmove(&normal_regs->ip, (void *)regs->sp, 5*8); 371 * regs->sp points to the failing IRET frame on the
365 normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ 372 * ESPFIX64 stack. Copy it to the entry stack. This fills
373 * in gpregs->ss through gpregs->ip.
374 *
375 */
376 memmove(&gpregs->ip, (void *)regs->sp, 5*8);
377 gpregs->orig_ax = 0; /* Missing (lost) #GP error code */
378
379 /*
380 * Adjust our frame so that we return straight to the #GP
381 * vector with the expected RSP value. This is safe because
382 * we won't enable interupts or schedule before we invoke
383 * general_protection, so nothing will clobber the stack
384 * frame we just set up.
385 */
366 regs->ip = (unsigned long)general_protection; 386 regs->ip = (unsigned long)general_protection;
367 regs->sp = (unsigned long)&normal_regs->orig_ax; 387 regs->sp = (unsigned long)&gpregs->orig_ax;
368 388
369 return; 389 return;
370 } 390 }
@@ -389,7 +409,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
389 * 409 *
390 * Processors update CR2 whenever a page fault is detected. If a 410 * Processors update CR2 whenever a page fault is detected. If a
391 * second page fault occurs while an earlier page fault is being 411 * second page fault occurs while an earlier page fault is being
392 * deliv- ered, the faulting linear address of the second fault will 412 * delivered, the faulting linear address of the second fault will
393 * overwrite the contents of CR2 (replacing the previous 413 * overwrite the contents of CR2 (replacing the previous
394 * address). These updates to CR2 occur even if the page fault 414 * address). These updates to CR2 occur even if the page fault
395 * results in a double fault or occurs during the delivery of a 415 * results in a double fault or occurs during the delivery of a
@@ -605,14 +625,15 @@ NOKPROBE_SYMBOL(do_int3);
605 625
606#ifdef CONFIG_X86_64 626#ifdef CONFIG_X86_64
607/* 627/*
608 * Help handler running on IST stack to switch off the IST stack if the 628 * Help handler running on a per-cpu (IST or entry trampoline) stack
609 * interrupted code was in user mode. The actual stack switch is done in 629 * to switch to the normal thread stack if the interrupted code was in
610 * entry_64.S 630 * user mode. The actual stack switch is done in entry_64.S
611 */ 631 */
612asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) 632asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
613{ 633{
614 struct pt_regs *regs = task_pt_regs(current); 634 struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
615 *regs = *eregs; 635 if (regs != eregs)
636 *regs = *eregs;
616 return regs; 637 return regs;
617} 638}
618NOKPROBE_SYMBOL(sync_regs); 639NOKPROBE_SYMBOL(sync_regs);
@@ -628,13 +649,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
628 /* 649 /*
629 * This is called from entry_64.S early in handling a fault 650 * This is called from entry_64.S early in handling a fault
630 * caused by a bad iret to user mode. To handle the fault 651 * caused by a bad iret to user mode. To handle the fault
631 * correctly, we want move our stack frame to task_pt_regs 652 * correctly, we want to move our stack frame to where it would
632 * and we want to pretend that the exception came from the 653 * be had we entered directly on the entry stack (rather than
633 * iret target. 654 * just below the IRET frame) and we want to pretend that the
655 * exception came from the IRET target.
634 */ 656 */
635 struct bad_iret_stack *new_stack = 657 struct bad_iret_stack *new_stack =
636 container_of(task_pt_regs(current), 658 (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
637 struct bad_iret_stack, regs);
638 659
639 /* Copy the IRET target to the new stack. */ 660 /* Copy the IRET target to the new stack. */
640 memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); 661 memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
@@ -795,14 +816,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
795 debug_stack_usage_dec(); 816 debug_stack_usage_dec();
796 817
797exit: 818exit:
798#if defined(CONFIG_X86_32)
799 /*
800 * This is the most likely code path that involves non-trivial use
801 * of the SYSENTER stack. Check that we haven't overrun it.
802 */
803 WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
804 "Overran or corrupted SYSENTER stack\n");
805#endif
806 ist_exit(regs); 819 ist_exit(regs);
807} 820}
808NOKPROBE_SYMBOL(do_debug); 821NOKPROBE_SYMBOL(do_debug);
@@ -929,6 +942,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
929 942
930void __init trap_init(void) 943void __init trap_init(void)
931{ 944{
945 /* Init cpu_entry_area before IST entries are set up */
946 setup_cpu_entry_areas();
947
932 idt_setup_traps(); 948 idt_setup_traps();
933 949
934 /* 950 /*
@@ -936,8 +952,9 @@ void __init trap_init(void)
936 * "sidt" instruction will not leak the location of the kernel, and 952 * "sidt" instruction will not leak the location of the kernel, and
937 * to defend the IDT against arbitrary memory write vulnerabilities. 953 * to defend the IDT against arbitrary memory write vulnerabilities.
938 * It will be reloaded in cpu_init() */ 954 * It will be reloaded in cpu_init() */
939 __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO); 955 cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
940 idt_descr.address = fix_to_virt(FIX_RO_IDT); 956 PAGE_KERNEL_RO);
957 idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
941 958
942 /* 959 /*
943 * Should be a barrier for any external CPU state: 960 * Should be a barrier for any external CPU state:
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index a3f973b2c97a..be86a865087a 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
253 return NULL; 253 return NULL;
254} 254}
255 255
256static bool stack_access_ok(struct unwind_state *state, unsigned long addr, 256static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
257 size_t len) 257 size_t len)
258{ 258{
259 struct stack_info *info = &state->stack_info; 259 struct stack_info *info = &state->stack_info;
260 void *addr = (void *)_addr;
260 261
261 /* 262 if (!on_stack(info, addr, len) &&
262 * If the address isn't on the current stack, switch to the next one. 263 (get_stack_info(addr, state->task, info, &state->stack_mask)))
263 * 264 return false;
264 * We may have to traverse multiple stacks to deal with the possibility
265 * that info->next_sp could point to an empty stack and the address
266 * could be on a subsequent stack.
267 */
268 while (!on_stack(info, (void *)addr, len))
269 if (get_stack_info(info->next_sp, state->task, info,
270 &state->stack_mask))
271 return false;
272 265
273 return true; 266 return true;
274} 267}
@@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
283 return true; 276 return true;
284} 277}
285 278
286#define REGS_SIZE (sizeof(struct pt_regs))
287#define SP_OFFSET (offsetof(struct pt_regs, sp))
288#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
289#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
290
291static bool deref_stack_regs(struct unwind_state *state, unsigned long addr, 279static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
292 unsigned long *ip, unsigned long *sp, bool full) 280 unsigned long *ip, unsigned long *sp)
293{ 281{
294 size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE; 282 struct pt_regs *regs = (struct pt_regs *)addr;
295 size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
296 struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
297
298 if (IS_ENABLED(CONFIG_X86_64)) {
299 if (!stack_access_ok(state, addr, regs_size))
300 return false;
301 283
302 *ip = regs->ip; 284 /* x86-32 support will be more complicated due to the &regs->sp hack */
303 *sp = regs->sp; 285 BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));
304 286
305 return true; 287 if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
306 }
307
308 if (!stack_access_ok(state, addr, sp_offset))
309 return false; 288 return false;
310 289
311 *ip = regs->ip; 290 *ip = regs->ip;
291 *sp = regs->sp;
292 return true;
293}
312 294
313 if (user_mode(regs)) { 295static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
314 if (!stack_access_ok(state, addr + sp_offset, 296 unsigned long *ip, unsigned long *sp)
315 REGS_SIZE - SP_OFFSET)) 297{
316 return false; 298 struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;
317 299
318 *sp = regs->sp; 300 if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
319 } else 301 return false;
320 *sp = (unsigned long)&regs->sp;
321 302
303 *ip = regs->ip;
304 *sp = regs->sp;
322 return true; 305 return true;
323} 306}
324 307
@@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state)
327 unsigned long ip_p, sp, orig_ip, prev_sp = state->sp; 310 unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
328 enum stack_type prev_type = state->stack_info.type; 311 enum stack_type prev_type = state->stack_info.type;
329 struct orc_entry *orc; 312 struct orc_entry *orc;
330 struct pt_regs *ptregs;
331 bool indirect = false; 313 bool indirect = false;
332 314
333 if (unwind_done(state)) 315 if (unwind_done(state))
@@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state)
435 break; 417 break;
436 418
437 case ORC_TYPE_REGS: 419 case ORC_TYPE_REGS:
438 if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) { 420 if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
439 orc_warn("can't dereference registers at %p for ip %pB\n", 421 orc_warn("can't dereference registers at %p for ip %pB\n",
440 (void *)sp, (void *)orig_ip); 422 (void *)sp, (void *)orig_ip);
441 goto done; 423 goto done;
@@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state)
447 break; 429 break;
448 430
449 case ORC_TYPE_REGS_IRET: 431 case ORC_TYPE_REGS_IRET:
450 if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) { 432 if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
451 orc_warn("can't dereference iret registers at %p for ip %pB\n", 433 orc_warn("can't dereference iret registers at %p for ip %pB\n",
452 (void *)sp, (void *)orig_ip); 434 (void *)sp, (void *)orig_ip);
453 goto done; 435 goto done;
454 } 436 }
455 437
456 ptregs = container_of((void *)sp, struct pt_regs, ip); 438 state->regs = (void *)sp - IRET_FRAME_OFFSET;
457 if ((unsigned long)ptregs >= prev_sp && 439 state->full_regs = false;
458 on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
459 state->regs = ptregs;
460 state->full_regs = false;
461 } else
462 state->regs = NULL;
463
464 state->signal = true; 440 state->signal = true;
465 break; 441 break;
466 442
@@ -553,8 +529,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
553 } 529 }
554 530
555 if (get_stack_info((unsigned long *)state->sp, state->task, 531 if (get_stack_info((unsigned long *)state->sp, state->task,
556 &state->stack_info, &state->stack_mask)) 532 &state->stack_info, &state->stack_mask)) {
557 return; 533 /*
534 * We weren't on a valid stack. It's possible that
535 * we overflowed a valid stack into a guard page.
536 * See if the next page up is valid so that we can
537 * generate some kind of backtrace if this happens.
538 */
539 void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
540 if (get_stack_info(next_page, state->task, &state->stack_info,
541 &state->stack_mask))
542 return;
543 }
558 544
559 /* 545 /*
560 * The caller can provide the address of the first frame directly 546 * The caller can provide the address of the first frame directly
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index a4009fb9be87..d2a8b5a24a44 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -107,6 +107,15 @@ SECTIONS
107 SOFTIRQENTRY_TEXT 107 SOFTIRQENTRY_TEXT
108 *(.fixup) 108 *(.fixup)
109 *(.gnu.warning) 109 *(.gnu.warning)
110
111#ifdef CONFIG_X86_64
112 . = ALIGN(PAGE_SIZE);
113 _entry_trampoline = .;
114 *(.entry_trampoline)
115 . = ALIGN(PAGE_SIZE);
116 ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
117#endif
118
110 /* End of text section */ 119 /* End of text section */
111 _etext = .; 120 _etext = .;
112 } :text = 0x9090 121 } :text = 0x9090
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index abe74f779f9d..b514b2b2845a 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2390,9 +2390,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
2390} 2390}
2391 2391
2392static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, 2392static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2393 u64 cr0, u64 cr4) 2393 u64 cr0, u64 cr3, u64 cr4)
2394{ 2394{
2395 int bad; 2395 int bad;
2396 u64 pcid;
2397
2398 /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
2399 pcid = 0;
2400 if (cr4 & X86_CR4_PCIDE) {
2401 pcid = cr3 & 0xfff;
2402 cr3 &= ~0xfff;
2403 }
2404
2405 bad = ctxt->ops->set_cr(ctxt, 3, cr3);
2406 if (bad)
2407 return X86EMUL_UNHANDLEABLE;
2396 2408
2397 /* 2409 /*
2398 * First enable PAE, long mode needs it before CR0.PG = 1 is set. 2410 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
@@ -2411,6 +2423,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2411 bad = ctxt->ops->set_cr(ctxt, 4, cr4); 2423 bad = ctxt->ops->set_cr(ctxt, 4, cr4);
2412 if (bad) 2424 if (bad)
2413 return X86EMUL_UNHANDLEABLE; 2425 return X86EMUL_UNHANDLEABLE;
2426 if (pcid) {
2427 bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
2428 if (bad)
2429 return X86EMUL_UNHANDLEABLE;
2430 }
2431
2414 } 2432 }
2415 2433
2416 return X86EMUL_CONTINUE; 2434 return X86EMUL_CONTINUE;
@@ -2421,11 +2439,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
2421 struct desc_struct desc; 2439 struct desc_struct desc;
2422 struct desc_ptr dt; 2440 struct desc_ptr dt;
2423 u16 selector; 2441 u16 selector;
2424 u32 val, cr0, cr4; 2442 u32 val, cr0, cr3, cr4;
2425 int i; 2443 int i;
2426 2444
2427 cr0 = GET_SMSTATE(u32, smbase, 0x7ffc); 2445 cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
2428 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8)); 2446 cr3 = GET_SMSTATE(u32, smbase, 0x7ff8);
2429 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED; 2447 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
2430 ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0); 2448 ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
2431 2449
@@ -2467,14 +2485,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
2467 2485
2468 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8)); 2486 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
2469 2487
2470 return rsm_enter_protected_mode(ctxt, cr0, cr4); 2488 return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2471} 2489}
2472 2490
2473static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) 2491static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
2474{ 2492{
2475 struct desc_struct desc; 2493 struct desc_struct desc;
2476 struct desc_ptr dt; 2494 struct desc_ptr dt;
2477 u64 val, cr0, cr4; 2495 u64 val, cr0, cr3, cr4;
2478 u32 base3; 2496 u32 base3;
2479 u16 selector; 2497 u16 selector;
2480 int i, r; 2498 int i, r;
@@ -2491,7 +2509,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
2491 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); 2509 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
2492 2510
2493 cr0 = GET_SMSTATE(u64, smbase, 0x7f58); 2511 cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
2494 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50)); 2512 cr3 = GET_SMSTATE(u64, smbase, 0x7f50);
2495 cr4 = GET_SMSTATE(u64, smbase, 0x7f48); 2513 cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
2496 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00)); 2514 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
2497 val = GET_SMSTATE(u64, smbase, 0x7ed0); 2515 val = GET_SMSTATE(u64, smbase, 0x7ed0);
@@ -2519,7 +2537,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
2519 dt.address = GET_SMSTATE(u64, smbase, 0x7e68); 2537 dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
2520 ctxt->ops->set_gdt(ctxt, &dt); 2538 ctxt->ops->set_gdt(ctxt, &dt);
2521 2539
2522 r = rsm_enter_protected_mode(ctxt, cr0, cr4); 2540 r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2523 if (r != X86EMUL_CONTINUE) 2541 if (r != X86EMUL_CONTINUE)
2524 return r; 2542 return r;
2525 2543
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e5e66e5c6640..c4deb1f34faa 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3395,7 +3395,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
3395 spin_lock(&vcpu->kvm->mmu_lock); 3395 spin_lock(&vcpu->kvm->mmu_lock);
3396 if(make_mmu_pages_available(vcpu) < 0) { 3396 if(make_mmu_pages_available(vcpu) < 0) {
3397 spin_unlock(&vcpu->kvm->mmu_lock); 3397 spin_unlock(&vcpu->kvm->mmu_lock);
3398 return 1; 3398 return -ENOSPC;
3399 } 3399 }
3400 sp = kvm_mmu_get_page(vcpu, 0, 0, 3400 sp = kvm_mmu_get_page(vcpu, 0, 0,
3401 vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL); 3401 vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
@@ -3410,7 +3410,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
3410 spin_lock(&vcpu->kvm->mmu_lock); 3410 spin_lock(&vcpu->kvm->mmu_lock);
3411 if (make_mmu_pages_available(vcpu) < 0) { 3411 if (make_mmu_pages_available(vcpu) < 0) {
3412 spin_unlock(&vcpu->kvm->mmu_lock); 3412 spin_unlock(&vcpu->kvm->mmu_lock);
3413 return 1; 3413 return -ENOSPC;
3414 } 3414 }
3415 sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), 3415 sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
3416 i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL); 3416 i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
@@ -3450,7 +3450,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
3450 spin_lock(&vcpu->kvm->mmu_lock); 3450 spin_lock(&vcpu->kvm->mmu_lock);
3451 if (make_mmu_pages_available(vcpu) < 0) { 3451 if (make_mmu_pages_available(vcpu) < 0) {
3452 spin_unlock(&vcpu->kvm->mmu_lock); 3452 spin_unlock(&vcpu->kvm->mmu_lock);
3453 return 1; 3453 return -ENOSPC;
3454 } 3454 }
3455 sp = kvm_mmu_get_page(vcpu, root_gfn, 0, 3455 sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
3456 vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL); 3456 vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
@@ -3487,7 +3487,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
3487 spin_lock(&vcpu->kvm->mmu_lock); 3487 spin_lock(&vcpu->kvm->mmu_lock);
3488 if (make_mmu_pages_available(vcpu) < 0) { 3488 if (make_mmu_pages_available(vcpu) < 0) {
3489 spin_unlock(&vcpu->kvm->mmu_lock); 3489 spin_unlock(&vcpu->kvm->mmu_lock);
3490 return 1; 3490 return -ENOSPC;
3491 } 3491 }
3492 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, 3492 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
3493 0, ACC_ALL); 3493 0, ACC_ALL);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8eba631c4dbd..023afa0c8887 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2302,7 +2302,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2302 * processors. See 22.2.4. 2302 * processors. See 22.2.4.
2303 */ 2303 */
2304 vmcs_writel(HOST_TR_BASE, 2304 vmcs_writel(HOST_TR_BASE,
2305 (unsigned long)this_cpu_ptr(&cpu_tss)); 2305 (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
2306 vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ 2306 vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
2307 2307
2308 /* 2308 /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index faf843c9b916..1cec2c62a0b0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4384,7 +4384,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
4384 addr, n, v)) 4384 addr, n, v))
4385 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) 4385 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
4386 break; 4386 break;
4387 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); 4387 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
4388 handled += n; 4388 handled += n;
4389 addr += n; 4389 addr += n;
4390 len -= n; 4390 len -= n;
@@ -4643,7 +4643,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
4643{ 4643{
4644 if (vcpu->mmio_read_completed) { 4644 if (vcpu->mmio_read_completed) {
4645 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, 4645 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
4646 vcpu->mmio_fragments[0].gpa, *(u64 *)val); 4646 vcpu->mmio_fragments[0].gpa, val);
4647 vcpu->mmio_read_completed = 0; 4647 vcpu->mmio_read_completed = 0;
4648 return 1; 4648 return 1;
4649 } 4649 }
@@ -4665,14 +4665,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
4665 4665
4666static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val) 4666static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
4667{ 4667{
4668 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); 4668 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
4669 return vcpu_mmio_write(vcpu, gpa, bytes, val); 4669 return vcpu_mmio_write(vcpu, gpa, bytes, val);
4670} 4670}
4671 4671
4672static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, 4672static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
4673 void *val, int bytes) 4673 void *val, int bytes)
4674{ 4674{
4675 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); 4675 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
4676 return X86EMUL_IO_NEEDED; 4676 return X86EMUL_IO_NEEDED;
4677} 4677}
4678 4678
@@ -7264,13 +7264,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
7264 7264
7265int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 7265int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
7266{ 7266{
7267 struct fpu *fpu = &current->thread.fpu;
7268 int r; 7267 int r;
7269 7268
7270 fpu__initialize(fpu);
7271
7272 kvm_sigset_activate(vcpu); 7269 kvm_sigset_activate(vcpu);
7273 7270
7271 kvm_load_guest_fpu(vcpu);
7272
7274 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { 7273 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
7275 if (kvm_run->immediate_exit) { 7274 if (kvm_run->immediate_exit) {
7276 r = -EINTR; 7275 r = -EINTR;
@@ -7296,14 +7295,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
7296 } 7295 }
7297 } 7296 }
7298 7297
7299 kvm_load_guest_fpu(vcpu);
7300
7301 if (unlikely(vcpu->arch.complete_userspace_io)) { 7298 if (unlikely(vcpu->arch.complete_userspace_io)) {
7302 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io; 7299 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
7303 vcpu->arch.complete_userspace_io = NULL; 7300 vcpu->arch.complete_userspace_io = NULL;
7304 r = cui(vcpu); 7301 r = cui(vcpu);
7305 if (r <= 0) 7302 if (r <= 0)
7306 goto out_fpu; 7303 goto out;
7307 } else 7304 } else
7308 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); 7305 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
7309 7306
@@ -7312,9 +7309,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
7312 else 7309 else
7313 r = vcpu_run(vcpu); 7310 r = vcpu_run(vcpu);
7314 7311
7315out_fpu:
7316 kvm_put_guest_fpu(vcpu);
7317out: 7312out:
7313 kvm_put_guest_fpu(vcpu);
7318 post_kvm_run_save(vcpu); 7314 post_kvm_run_save(vcpu);
7319 kvm_sigset_deactivate(vcpu); 7315 kvm_sigset_deactivate(vcpu);
7320 7316
@@ -7384,7 +7380,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
7384#endif 7380#endif
7385 7381
7386 kvm_rip_write(vcpu, regs->rip); 7382 kvm_rip_write(vcpu, regs->rip);
7387 kvm_set_rflags(vcpu, regs->rflags); 7383 kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
7388 7384
7389 vcpu->arch.exception.pending = false; 7385 vcpu->arch.exception.pending = false;
7390 7386
@@ -7498,6 +7494,29 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
7498} 7494}
7499EXPORT_SYMBOL_GPL(kvm_task_switch); 7495EXPORT_SYMBOL_GPL(kvm_task_switch);
7500 7496
7497int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
7498{
7499 if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) {
7500 /*
7501 * When EFER.LME and CR0.PG are set, the processor is in
7502 * 64-bit mode (though maybe in a 32-bit code segment).
7503 * CR4.PAE and EFER.LMA must be set.
7504 */
7505 if (!(sregs->cr4 & X86_CR4_PAE_BIT)
7506 || !(sregs->efer & EFER_LMA))
7507 return -EINVAL;
7508 } else {
7509 /*
7510 * Not in 64-bit mode: EFER.LMA is clear and the code
7511 * segment cannot be 64-bit.
7512 */
7513 if (sregs->efer & EFER_LMA || sregs->cs.l)
7514 return -EINVAL;
7515 }
7516
7517 return 0;
7518}
7519
7501int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 7520int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
7502 struct kvm_sregs *sregs) 7521 struct kvm_sregs *sregs)
7503{ 7522{
@@ -7510,6 +7529,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
7510 (sregs->cr4 & X86_CR4_OSXSAVE)) 7529 (sregs->cr4 & X86_CR4_OSXSAVE))
7511 return -EINVAL; 7530 return -EINVAL;
7512 7531
7532 if (kvm_valid_sregs(vcpu, sregs))
7533 return -EINVAL;
7534
7513 apic_base_msr.data = sregs->apic_base; 7535 apic_base_msr.data = sregs->apic_base;
7514 apic_base_msr.host_initiated = true; 7536 apic_base_msr.host_initiated = true;
7515 if (kvm_set_apic_base(vcpu, &apic_base_msr)) 7537 if (kvm_set_apic_base(vcpu, &apic_base_msr))
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 553f8fd23cc4..4846eff7e4c8 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops)
107 delay = min_t(u64, MWAITX_MAX_LOOPS, loops); 107 delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
108 108
109 /* 109 /*
110 * Use cpu_tss as a cacheline-aligned, seldomly 110 * Use cpu_tss_rw as a cacheline-aligned, seldomly
111 * accessed per-cpu variable as the monitor target. 111 * accessed per-cpu variable as the monitor target.
112 */ 112 */
113 __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0); 113 __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
114 114
115 /* 115 /*
116 * AMD, like Intel, supports the EAX hint and EAX=0xf 116 * AMD, like Intel, supports the EAX hint and EAX=0xf
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 8e13b8cc6bed..52195ee3f6d5 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -10,7 +10,7 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg
10endif 10endif
11 11
12obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ 12obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
13 pat.o pgtable.o physaddr.o setup_nx.o tlb.o 13 pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
14 14
15# Make sure __phys_addr has no stackprotector 15# Make sure __phys_addr has no stackprotector
16nostackp := $(call cc-option, -fno-stack-protector) 16nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
new file mode 100644
index 000000000000..fe814fd5e014
--- /dev/null
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -0,0 +1,139 @@
1// SPDX-License-Identifier: GPL-2.0
2
3#include <linux/spinlock.h>
4#include <linux/percpu.h>
5
6#include <asm/cpu_entry_area.h>
7#include <asm/pgtable.h>
8#include <asm/fixmap.h>
9#include <asm/desc.h>
10
11static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
12
13#ifdef CONFIG_X86_64
14static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
15 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
16#endif
17
18struct cpu_entry_area *get_cpu_entry_area(int cpu)
19{
20 unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
21 BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
22
23 return (struct cpu_entry_area *) va;
24}
25EXPORT_SYMBOL(get_cpu_entry_area);
26
27void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
28{
29 unsigned long va = (unsigned long) cea_vaddr;
30
31 set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
32}
33
34static void __init
35cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
36{
37 for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
38 cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
39}
40
41/* Setup the fixmap mappings only once per-processor */
42static void __init setup_cpu_entry_area(int cpu)
43{
44#ifdef CONFIG_X86_64
45 extern char _entry_trampoline[];
46
47 /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
48 pgprot_t gdt_prot = PAGE_KERNEL_RO;
49 pgprot_t tss_prot = PAGE_KERNEL_RO;
50#else
51 /*
52 * On native 32-bit systems, the GDT cannot be read-only because
53 * our double fault handler uses a task gate, and entering through
54 * a task gate needs to change an available TSS to busy. If the
55 * GDT is read-only, that will triple fault. The TSS cannot be
56 * read-only because the CPU writes to it on task switches.
57 *
58 * On Xen PV, the GDT must be read-only because the hypervisor
59 * requires it.
60 */
61 pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
62 PAGE_KERNEL_RO : PAGE_KERNEL;
63 pgprot_t tss_prot = PAGE_KERNEL;
64#endif
65
66 cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
67 gdt_prot);
68
69 cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
70 per_cpu_ptr(&entry_stack_storage, cpu), 1,
71 PAGE_KERNEL);
72
73 /*
74 * The Intel SDM says (Volume 3, 7.2.1):
75 *
76 * Avoid placing a page boundary in the part of the TSS that the
77 * processor reads during a task switch (the first 104 bytes). The
78 * processor may not correctly perform address translations if a
79 * boundary occurs in this area. During a task switch, the processor
80 * reads and writes into the first 104 bytes of each TSS (using
81 * contiguous physical addresses beginning with the physical address
82 * of the first byte of the TSS). So, after TSS access begins, if
83 * part of the 104 bytes is not physically contiguous, the processor
84 * will access incorrect information without generating a page-fault
85 * exception.
86 *
87 * There are also a lot of errata involving the TSS spanning a page
88 * boundary. Assert that we're not doing that.
89 */
90 BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
91 offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
92 BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
93 cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
94 &per_cpu(cpu_tss_rw, cpu),
95 sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
96
97#ifdef CONFIG_X86_32
98 per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
99#endif
100
101#ifdef CONFIG_X86_64
102 BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
103 BUILD_BUG_ON(sizeof(exception_stacks) !=
104 sizeof(((struct cpu_entry_area *)0)->exception_stacks));
105 cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
106 &per_cpu(exception_stacks, cpu),
107 sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
108
109 cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
110 __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
111#endif
112}
113
114static __init void setup_cpu_entry_area_ptes(void)
115{
116#ifdef CONFIG_X86_32
117 unsigned long start, end;
118
119 BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
120 BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
121
122 start = CPU_ENTRY_AREA_BASE;
123 end = start + CPU_ENTRY_AREA_MAP_SIZE;
124
125 /* Careful here: start + PMD_SIZE might wrap around */
126 for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE)
127 populate_extra_pte(start);
128#endif
129}
130
131void __init setup_cpu_entry_areas(void)
132{
133 unsigned int cpu;
134
135 setup_cpu_entry_area_ptes();
136
137 for_each_possible_cpu(cpu)
138 setup_cpu_entry_area(cpu);
139}
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 5e3ac6fe6c9e..43dedbfb7257 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -44,10 +44,12 @@ struct addr_marker {
44 unsigned long max_lines; 44 unsigned long max_lines;
45}; 45};
46 46
47/* indices for address_markers; keep sync'd w/ address_markers below */ 47/* Address space markers hints */
48
49#ifdef CONFIG_X86_64
50
48enum address_markers_idx { 51enum address_markers_idx {
49 USER_SPACE_NR = 0, 52 USER_SPACE_NR = 0,
50#ifdef CONFIG_X86_64
51 KERNEL_SPACE_NR, 53 KERNEL_SPACE_NR,
52 LOW_KERNEL_NR, 54 LOW_KERNEL_NR,
53 VMALLOC_START_NR, 55 VMALLOC_START_NR,
@@ -56,56 +58,74 @@ enum address_markers_idx {
56 KASAN_SHADOW_START_NR, 58 KASAN_SHADOW_START_NR,
57 KASAN_SHADOW_END_NR, 59 KASAN_SHADOW_END_NR,
58#endif 60#endif
59# ifdef CONFIG_X86_ESPFIX64 61 CPU_ENTRY_AREA_NR,
62#ifdef CONFIG_X86_ESPFIX64
60 ESPFIX_START_NR, 63 ESPFIX_START_NR,
61# endif 64#endif
65#ifdef CONFIG_EFI
66 EFI_END_NR,
67#endif
62 HIGH_KERNEL_NR, 68 HIGH_KERNEL_NR,
63 MODULES_VADDR_NR, 69 MODULES_VADDR_NR,
64 MODULES_END_NR, 70 MODULES_END_NR,
65#else 71 FIXADDR_START_NR,
72 END_OF_SPACE_NR,
73};
74
75static struct addr_marker address_markers[] = {
76 [USER_SPACE_NR] = { 0, "User Space" },
77 [KERNEL_SPACE_NR] = { (1UL << 63), "Kernel Space" },
78 [LOW_KERNEL_NR] = { 0UL, "Low Kernel Mapping" },
79 [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
80 [VMEMMAP_START_NR] = { 0UL, "Vmemmap" },
81#ifdef CONFIG_KASAN
82 [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
83 [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" },
84#endif
85 [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
86#ifdef CONFIG_X86_ESPFIX64
87 [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
88#endif
89#ifdef CONFIG_EFI
90 [EFI_END_NR] = { EFI_VA_END, "EFI Runtime Services" },
91#endif
92 [HIGH_KERNEL_NR] = { __START_KERNEL_map, "High Kernel Mapping" },
93 [MODULES_VADDR_NR] = { MODULES_VADDR, "Modules" },
94 [MODULES_END_NR] = { MODULES_END, "End Modules" },
95 [FIXADDR_START_NR] = { FIXADDR_START, "Fixmap Area" },
96 [END_OF_SPACE_NR] = { -1, NULL }
97};
98
99#else /* CONFIG_X86_64 */
100
101enum address_markers_idx {
102 USER_SPACE_NR = 0,
66 KERNEL_SPACE_NR, 103 KERNEL_SPACE_NR,
67 VMALLOC_START_NR, 104 VMALLOC_START_NR,
68 VMALLOC_END_NR, 105 VMALLOC_END_NR,
69# ifdef CONFIG_HIGHMEM 106#ifdef CONFIG_HIGHMEM
70 PKMAP_BASE_NR, 107 PKMAP_BASE_NR,
71# endif
72 FIXADDR_START_NR,
73#endif 108#endif
109 CPU_ENTRY_AREA_NR,
110 FIXADDR_START_NR,
111 END_OF_SPACE_NR,
74}; 112};
75 113
76/* Address space markers hints */
77static struct addr_marker address_markers[] = { 114static struct addr_marker address_markers[] = {
78 { 0, "User Space" }, 115 [USER_SPACE_NR] = { 0, "User Space" },
79#ifdef CONFIG_X86_64 116 [KERNEL_SPACE_NR] = { PAGE_OFFSET, "Kernel Mapping" },
80 { 0x8000000000000000UL, "Kernel Space" }, 117 [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
81 { 0/* PAGE_OFFSET */, "Low Kernel Mapping" }, 118 [VMALLOC_END_NR] = { 0UL, "vmalloc() End" },
82 { 0/* VMALLOC_START */, "vmalloc() Area" }, 119#ifdef CONFIG_HIGHMEM
83 { 0/* VMEMMAP_START */, "Vmemmap" }, 120 [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" },
84#ifdef CONFIG_KASAN
85 { KASAN_SHADOW_START, "KASAN shadow" },
86 { KASAN_SHADOW_END, "KASAN shadow end" },
87#endif 121#endif
88# ifdef CONFIG_X86_ESPFIX64 122 [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" },
89 { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, 123 [FIXADDR_START_NR] = { 0UL, "Fixmap area" },
90# endif 124 [END_OF_SPACE_NR] = { -1, NULL }
91# ifdef CONFIG_EFI
92 { EFI_VA_END, "EFI Runtime Services" },
93# endif
94 { __START_KERNEL_map, "High Kernel Mapping" },
95 { MODULES_VADDR, "Modules" },
96 { MODULES_END, "End Modules" },
97#else
98 { PAGE_OFFSET, "Kernel Mapping" },
99 { 0/* VMALLOC_START */, "vmalloc() Area" },
100 { 0/*VMALLOC_END*/, "vmalloc() End" },
101# ifdef CONFIG_HIGHMEM
102 { 0/*PKMAP_BASE*/, "Persistent kmap() Area" },
103# endif
104 { 0/*FIXADDR_START*/, "Fixmap Area" },
105#endif
106 { -1, NULL } /* End of list */
107}; 125};
108 126
127#endif /* !CONFIG_X86_64 */
128
109/* Multipliers for offsets within the PTEs */ 129/* Multipliers for offsets within the PTEs */
110#define PTE_LEVEL_MULT (PAGE_SIZE) 130#define PTE_LEVEL_MULT (PAGE_SIZE)
111#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) 131#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
@@ -140,7 +160,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
140 static const char * const level_name[] = 160 static const char * const level_name[] =
141 { "cr3", "pgd", "p4d", "pud", "pmd", "pte" }; 161 { "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
142 162
143 if (!pgprot_val(prot)) { 163 if (!(pr & _PAGE_PRESENT)) {
144 /* Not present */ 164 /* Not present */
145 pt_dump_cont_printf(m, dmsg, " "); 165 pt_dump_cont_printf(m, dmsg, " ");
146 } else { 166 } else {
@@ -525,8 +545,8 @@ static int __init pt_dump_init(void)
525 address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; 545 address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
526# endif 546# endif
527 address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; 547 address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
548 address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
528#endif 549#endif
529
530 return 0; 550 return 0;
531} 551}
532__initcall(pt_dump_init); 552__initcall(pt_dump_init);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index febf6980e653..06fe3d51d385 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -860,7 +860,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
860 if (!printk_ratelimit()) 860 if (!printk_ratelimit())
861 return; 861 return;
862 862
863 printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx", 863 printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
864 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 864 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
865 tsk->comm, task_pid_nr(tsk), address, 865 tsk->comm, task_pid_nr(tsk), address,
866 (void *)regs->ip, (void *)regs->sp, error_code); 866 (void *)regs->ip, (void *)regs->sp, error_code);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8a64a6f2848d..135c9a7898c7 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -50,6 +50,7 @@
50#include <asm/setup.h> 50#include <asm/setup.h>
51#include <asm/set_memory.h> 51#include <asm/set_memory.h>
52#include <asm/page_types.h> 52#include <asm/page_types.h>
53#include <asm/cpu_entry_area.h>
53#include <asm/init.h> 54#include <asm/init.h>
54 55
55#include "mm_internal.h" 56#include "mm_internal.h"
@@ -766,6 +767,7 @@ void __init mem_init(void)
766 mem_init_print_info(NULL); 767 mem_init_print_info(NULL);
767 printk(KERN_INFO "virtual kernel memory layout:\n" 768 printk(KERN_INFO "virtual kernel memory layout:\n"
768 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" 769 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
770 " cpu_entry : 0x%08lx - 0x%08lx (%4ld kB)\n"
769#ifdef CONFIG_HIGHMEM 771#ifdef CONFIG_HIGHMEM
770 " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" 772 " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
771#endif 773#endif
@@ -777,6 +779,10 @@ void __init mem_init(void)
777 FIXADDR_START, FIXADDR_TOP, 779 FIXADDR_START, FIXADDR_TOP,
778 (FIXADDR_TOP - FIXADDR_START) >> 10, 780 (FIXADDR_TOP - FIXADDR_START) >> 10,
779 781
782 CPU_ENTRY_AREA_BASE,
783 CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
784 CPU_ENTRY_AREA_MAP_SIZE >> 10,
785
780#ifdef CONFIG_HIGHMEM 786#ifdef CONFIG_HIGHMEM
781 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, 787 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
782 (LAST_PKMAP*PAGE_SIZE) >> 10, 788 (LAST_PKMAP*PAGE_SIZE) >> 10,
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 99dfed6dfef8..47388f0c0e59 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -15,6 +15,7 @@
15#include <asm/tlbflush.h> 15#include <asm/tlbflush.h>
16#include <asm/sections.h> 16#include <asm/sections.h>
17#include <asm/pgtable.h> 17#include <asm/pgtable.h>
18#include <asm/cpu_entry_area.h>
18 19
19extern struct range pfn_mapped[E820_MAX_ENTRIES]; 20extern struct range pfn_mapped[E820_MAX_ENTRIES];
20 21
@@ -277,6 +278,7 @@ void __init kasan_early_init(void)
277void __init kasan_init(void) 278void __init kasan_init(void)
278{ 279{
279 int i; 280 int i;
281 void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
280 282
281#ifdef CONFIG_KASAN_INLINE 283#ifdef CONFIG_KASAN_INLINE
282 register_die_notifier(&kasan_die_notifier); 284 register_die_notifier(&kasan_die_notifier);
@@ -321,16 +323,33 @@ void __init kasan_init(void)
321 map_range(&pfn_mapped[i]); 323 map_range(&pfn_mapped[i]);
322 } 324 }
323 325
326 shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
327 shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
328 shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
329 PAGE_SIZE);
330
331 shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
332 CPU_ENTRY_AREA_MAP_SIZE);
333 shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
334 shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
335 PAGE_SIZE);
336
324 kasan_populate_zero_shadow( 337 kasan_populate_zero_shadow(
325 kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), 338 kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
326 kasan_mem_to_shadow((void *)__START_KERNEL_map)); 339 shadow_cpu_entry_begin);
340
341 kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
342 (unsigned long)shadow_cpu_entry_end, 0);
343
344 kasan_populate_zero_shadow(shadow_cpu_entry_end,
345 kasan_mem_to_shadow((void *)__START_KERNEL_map));
327 346
328 kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), 347 kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
329 (unsigned long)kasan_mem_to_shadow(_end), 348 (unsigned long)kasan_mem_to_shadow(_end),
330 early_pfn_to_nid(__pa(_stext))); 349 early_pfn_to_nid(__pa(_stext)));
331 350
332 kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), 351 kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
333 (void *)KASAN_SHADOW_END); 352 (void *)KASAN_SHADOW_END);
334 353
335 load_cr3(init_top_pgt); 354 load_cr3(init_top_pgt);
336 __flush_tlb_all(); 355 __flush_tlb_all();
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 6b9bf023a700..c3c5274410a9 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -10,6 +10,7 @@
10#include <linux/pagemap.h> 10#include <linux/pagemap.h>
11#include <linux/spinlock.h> 11#include <linux/spinlock.h>
12 12
13#include <asm/cpu_entry_area.h>
13#include <asm/pgtable.h> 14#include <asm/pgtable.h>
14#include <asm/pgalloc.h> 15#include <asm/pgalloc.h>
15#include <asm/fixmap.h> 16#include <asm/fixmap.h>
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 3118392cdf75..0a1be3adc97e 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -128,7 +128,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
128 * isn't free. 128 * isn't free.
129 */ 129 */
130#ifdef CONFIG_DEBUG_VM 130#ifdef CONFIG_DEBUG_VM
131 if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) { 131 if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
132 /* 132 /*
133 * If we were to BUG here, we'd be very likely to kill 133 * If we were to BUG here, we'd be very likely to kill
134 * the system so hard that we don't see the call trace. 134 * the system so hard that we don't see the call trace.
@@ -195,7 +195,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
195 if (need_flush) { 195 if (need_flush) {
196 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); 196 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
197 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); 197 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
198 write_cr3(build_cr3(next, new_asid)); 198 write_cr3(build_cr3(next->pgd, new_asid));
199 199
200 /* 200 /*
201 * NB: This gets called via leave_mm() in the idle path 201 * NB: This gets called via leave_mm() in the idle path
@@ -208,7 +208,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
208 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); 208 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
209 } else { 209 } else {
210 /* The new ASID is already up to date. */ 210 /* The new ASID is already up to date. */
211 write_cr3(build_cr3_noflush(next, new_asid)); 211 write_cr3(build_cr3_noflush(next->pgd, new_asid));
212 212
213 /* See above wrt _rcuidle. */ 213 /* See above wrt _rcuidle. */
214 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); 214 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
@@ -288,7 +288,7 @@ void initialize_tlbstate_and_flush(void)
288 !(cr4_read_shadow() & X86_CR4_PCIDE)); 288 !(cr4_read_shadow() & X86_CR4_PCIDE));
289 289
290 /* Force ASID 0 and force a TLB flush. */ 290 /* Force ASID 0 and force a TLB flush. */
291 write_cr3(build_cr3(mm, 0)); 291 write_cr3(build_cr3(mm->pgd, 0));
292 292
293 /* Reinitialize tlbstate. */ 293 /* Reinitialize tlbstate. */
294 this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); 294 this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
@@ -551,7 +551,7 @@ static void do_kernel_range_flush(void *info)
551 551
552 /* flush range by one by one 'invlpg' */ 552 /* flush range by one by one 'invlpg' */
553 for (addr = f->start; addr < f->end; addr += PAGE_SIZE) 553 for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
554 __flush_tlb_single(addr); 554 __flush_tlb_one(addr);
555} 555}
556 556
557void flush_tlb_kernel_range(unsigned long start, unsigned long end) 557void flush_tlb_kernel_range(unsigned long start, unsigned long end)
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index f44c0bc95aa2..8538a6723171 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
299 local_flush_tlb(); 299 local_flush_tlb();
300 stat->d_alltlb++; 300 stat->d_alltlb++;
301 } else { 301 } else {
302 __flush_tlb_one(msg->address); 302 __flush_tlb_single(msg->address);
303 stat->d_onetlb++; 303 stat->d_onetlb++;
304 } 304 }
305 stat->d_requestee++; 305 stat->d_requestee++;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 36a28eddb435..a7d966964c6f 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -152,17 +152,19 @@ static void do_fpu_end(void)
152static void fix_processor_context(void) 152static void fix_processor_context(void)
153{ 153{
154 int cpu = smp_processor_id(); 154 int cpu = smp_processor_id();
155 struct tss_struct *t = &per_cpu(cpu_tss, cpu);
156#ifdef CONFIG_X86_64 155#ifdef CONFIG_X86_64
157 struct desc_struct *desc = get_cpu_gdt_rw(cpu); 156 struct desc_struct *desc = get_cpu_gdt_rw(cpu);
158 tss_desc tss; 157 tss_desc tss;
159#endif 158#endif
160 set_tss_desc(cpu, t); /* 159
161 * This just modifies memory; should not be 160 /*
162 * necessary. But... This is necessary, because 161 * We need to reload TR, which requires that we change the
163 * 386 hardware has concept of busy TSS or some 162 * GDT entry to indicate "available" first.
164 * similar stupidity. 163 *
165 */ 164 * XXX: This could probably all be replaced by a call to
165 * force_reload_TR().
166 */
167 set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
166 168
167#ifdef CONFIG_X86_64 169#ifdef CONFIG_X86_64
168 memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc)); 170 memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d669e9d89001..c9081c6671f0 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1,8 +1,12 @@
1#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
2#include <linux/bootmem.h>
3#endif
1#include <linux/cpu.h> 4#include <linux/cpu.h>
2#include <linux/kexec.h> 5#include <linux/kexec.h>
3 6
4#include <xen/features.h> 7#include <xen/features.h>
5#include <xen/page.h> 8#include <xen/page.h>
9#include <xen/interface/memory.h>
6 10
7#include <asm/xen/hypercall.h> 11#include <asm/xen/hypercall.h>
8#include <asm/xen/hypervisor.h> 12#include <asm/xen/hypervisor.h>
@@ -331,3 +335,80 @@ void xen_arch_unregister_cpu(int num)
331} 335}
332EXPORT_SYMBOL(xen_arch_unregister_cpu); 336EXPORT_SYMBOL(xen_arch_unregister_cpu);
333#endif 337#endif
338
339#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
340void __init arch_xen_balloon_init(struct resource *hostmem_resource)
341{
342 struct xen_memory_map memmap;
343 int rc;
344 unsigned int i, last_guest_ram;
345 phys_addr_t max_addr = PFN_PHYS(max_pfn);
346 struct e820_table *xen_e820_table;
347 const struct e820_entry *entry;
348 struct resource *res;
349
350 if (!xen_initial_domain())
351 return;
352
353 xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL);
354 if (!xen_e820_table)
355 return;
356
357 memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries);
358 set_xen_guest_handle(memmap.buffer, xen_e820_table->entries);
359 rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap);
360 if (rc) {
361 pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc);
362 goto out;
363 }
364
365 last_guest_ram = 0;
366 for (i = 0; i < memmap.nr_entries; i++) {
367 if (xen_e820_table->entries[i].addr >= max_addr)
368 break;
369 if (xen_e820_table->entries[i].type == E820_TYPE_RAM)
370 last_guest_ram = i;
371 }
372
373 entry = &xen_e820_table->entries[last_guest_ram];
374 if (max_addr >= entry->addr + entry->size)
375 goto out; /* No unallocated host RAM. */
376
377 hostmem_resource->start = max_addr;
378 hostmem_resource->end = entry->addr + entry->size;
379
380 /*
381 * Mark non-RAM regions between the end of dom0 RAM and end of host RAM
382 * as unavailable. The rest of that region can be used for hotplug-based
383 * ballooning.
384 */
385 for (; i < memmap.nr_entries; i++) {
386 entry = &xen_e820_table->entries[i];
387
388 if (entry->type == E820_TYPE_RAM)
389 continue;
390
391 if (entry->addr >= hostmem_resource->end)
392 break;
393
394 res = kzalloc(sizeof(*res), GFP_KERNEL);
395 if (!res)
396 goto out;
397
398 res->name = "Unavailable host RAM";
399 res->start = entry->addr;
400 res->end = (entry->addr + entry->size < hostmem_resource->end) ?
401 entry->addr + entry->size : hostmem_resource->end;
402 rc = insert_resource(hostmem_resource, res);
403 if (rc) {
404 pr_warn("%s: Can't insert [%llx - %llx) (%d)\n",
405 __func__, res->start, res->end, rc);
406 kfree(res);
407 goto out;
408 }
409 }
410
411 out:
412 kfree(xen_e820_table);
413}
414#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index f2414c6c5e7c..c047f42552e1 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -88,6 +88,8 @@
88#include "multicalls.h" 88#include "multicalls.h"
89#include "pmu.h" 89#include "pmu.h"
90 90
91#include "../kernel/cpu/cpu.h" /* get_cpu_cap() */
92
91void *xen_initial_gdt; 93void *xen_initial_gdt;
92 94
93static int xen_cpu_up_prepare_pv(unsigned int cpu); 95static int xen_cpu_up_prepare_pv(unsigned int cpu);
@@ -826,7 +828,7 @@ static void xen_load_sp0(unsigned long sp0)
826 mcs = xen_mc_entry(0); 828 mcs = xen_mc_entry(0);
827 MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0); 829 MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
828 xen_mc_issue(PARAVIRT_LAZY_CPU); 830 xen_mc_issue(PARAVIRT_LAZY_CPU);
829 this_cpu_write(cpu_tss.x86_tss.sp0, sp0); 831 this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
830} 832}
831 833
832void xen_set_iopl_mask(unsigned mask) 834void xen_set_iopl_mask(unsigned mask)
@@ -1258,6 +1260,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
1258 __userpte_alloc_gfp &= ~__GFP_HIGHMEM; 1260 __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
1259 1261
1260 /* Work out if we support NX */ 1262 /* Work out if we support NX */
1263 get_cpu_cap(&boot_cpu_data);
1261 x86_configure_nx(); 1264 x86_configure_nx();
1262 1265
1263 /* Get mfn list */ 1266 /* Get mfn list */
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index fc048ec686e7..4d62c071b166 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1902,6 +1902,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1902 /* Graft it onto L4[511][510] */ 1902 /* Graft it onto L4[511][510] */
1903 copy_page(level2_kernel_pgt, l2); 1903 copy_page(level2_kernel_pgt, l2);
1904 1904
1905 /*
1906 * Zap execute permission from the ident map. Due to the sharing of
1907 * L1 entries we need to do this in the L2.
1908 */
1909 if (__supported_pte_mask & _PAGE_NX) {
1910 for (i = 0; i < PTRS_PER_PMD; ++i) {
1911 if (pmd_none(level2_ident_pgt[i]))
1912 continue;
1913 level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX);
1914 }
1915 }
1916
1905 /* Copy the initial P->M table mappings if necessary. */ 1917 /* Copy the initial P->M table mappings if necessary. */
1906 i = pgd_index(xen_start_info->mfn_list); 1918 i = pgd_index(xen_start_info->mfn_list);
1907 if (i && i < pgd_index(__START_KERNEL_map)) 1919 if (i && i < pgd_index(__START_KERNEL_map))
@@ -2261,7 +2273,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2261 2273
2262 switch (idx) { 2274 switch (idx) {
2263 case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: 2275 case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
2264 case FIX_RO_IDT:
2265#ifdef CONFIG_X86_32 2276#ifdef CONFIG_X86_32
2266 case FIX_WP_TEST: 2277 case FIX_WP_TEST:
2267# ifdef CONFIG_HIGHMEM 2278# ifdef CONFIG_HIGHMEM
@@ -2272,7 +2283,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2272#endif 2283#endif
2273 case FIX_TEXT_POKE0: 2284 case FIX_TEXT_POKE0:
2274 case FIX_TEXT_POKE1: 2285 case FIX_TEXT_POKE1:
2275 case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
2276 /* All local page mappings */ 2286 /* All local page mappings */
2277 pte = pfn_pte(phys, prot); 2287 pte = pfn_pte(phys, prot);
2278 break; 2288 break;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index c114ca767b3b..6e0d2086eacb 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -808,7 +808,6 @@ char * __init xen_memory_setup(void)
808 addr = xen_e820_table.entries[0].addr; 808 addr = xen_e820_table.entries[0].addr;
809 size = xen_e820_table.entries[0].size; 809 size = xen_e820_table.entries[0].size;
810 while (i < xen_e820_table.nr_entries) { 810 while (i < xen_e820_table.nr_entries) {
811 bool discard = false;
812 811
813 chunk_size = size; 812 chunk_size = size;
814 type = xen_e820_table.entries[i].type; 813 type = xen_e820_table.entries[i].type;
@@ -824,11 +823,10 @@ char * __init xen_memory_setup(void)
824 xen_add_extra_mem(pfn_s, n_pfns); 823 xen_add_extra_mem(pfn_s, n_pfns);
825 xen_max_p2m_pfn = pfn_s + n_pfns; 824 xen_max_p2m_pfn = pfn_s + n_pfns;
826 } else 825 } else
827 discard = true; 826 type = E820_TYPE_UNUSABLE;
828 } 827 }
829 828
830 if (!discard) 829 xen_align_and_add_e820_region(addr, chunk_size, type);
831 xen_align_and_add_e820_region(addr, chunk_size, type);
832 830
833 addr += chunk_size; 831 addr += chunk_size;
834 size -= chunk_size; 832 size -= chunk_size;
diff --git a/block/bio.c b/block/bio.c
index 8bfdea58159b..9ef6cf3addb3 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -599,6 +599,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
599 bio->bi_disk = bio_src->bi_disk; 599 bio->bi_disk = bio_src->bi_disk;
600 bio->bi_partno = bio_src->bi_partno; 600 bio->bi_partno = bio_src->bi_partno;
601 bio_set_flag(bio, BIO_CLONED); 601 bio_set_flag(bio, BIO_CLONED);
602 if (bio_flagged(bio_src, BIO_THROTTLED))
603 bio_set_flag(bio, BIO_THROTTLED);
602 bio->bi_opf = bio_src->bi_opf; 604 bio->bi_opf = bio_src->bi_opf;
603 bio->bi_write_hint = bio_src->bi_write_hint; 605 bio->bi_write_hint = bio_src->bi_write_hint;
604 bio->bi_iter = bio_src->bi_iter; 606 bio->bi_iter = bio_src->bi_iter;
diff --git a/block/blk-map.c b/block/blk-map.c
index b21f8e86f120..d3a94719f03f 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -12,22 +12,29 @@
12#include "blk.h" 12#include "blk.h"
13 13
14/* 14/*
15 * Append a bio to a passthrough request. Only works can be merged into 15 * Append a bio to a passthrough request. Only works if the bio can be merged
16 * the request based on the driver constraints. 16 * into the request based on the driver constraints.
17 */ 17 */
18int blk_rq_append_bio(struct request *rq, struct bio *bio) 18int blk_rq_append_bio(struct request *rq, struct bio **bio)
19{ 19{
20 blk_queue_bounce(rq->q, &bio); 20 struct bio *orig_bio = *bio;
21
22 blk_queue_bounce(rq->q, bio);
21 23
22 if (!rq->bio) { 24 if (!rq->bio) {
23 blk_rq_bio_prep(rq->q, rq, bio); 25 blk_rq_bio_prep(rq->q, rq, *bio);
24 } else { 26 } else {
25 if (!ll_back_merge_fn(rq->q, rq, bio)) 27 if (!ll_back_merge_fn(rq->q, rq, *bio)) {
28 if (orig_bio != *bio) {
29 bio_put(*bio);
30 *bio = orig_bio;
31 }
26 return -EINVAL; 32 return -EINVAL;
33 }
27 34
28 rq->biotail->bi_next = bio; 35 rq->biotail->bi_next = *bio;
29 rq->biotail = bio; 36 rq->biotail = *bio;
30 rq->__data_len += bio->bi_iter.bi_size; 37 rq->__data_len += (*bio)->bi_iter.bi_size;
31 } 38 }
32 39
33 return 0; 40 return 0;
@@ -73,14 +80,12 @@ static int __blk_rq_map_user_iov(struct request *rq,
73 * We link the bounce buffer in and could have to traverse it 80 * We link the bounce buffer in and could have to traverse it
74 * later so we have to get a ref to prevent it from being freed 81 * later so we have to get a ref to prevent it from being freed
75 */ 82 */
76 ret = blk_rq_append_bio(rq, bio); 83 ret = blk_rq_append_bio(rq, &bio);
77 bio_get(bio);
78 if (ret) { 84 if (ret) {
79 bio_endio(bio);
80 __blk_rq_unmap_user(orig_bio); 85 __blk_rq_unmap_user(orig_bio);
81 bio_put(bio);
82 return ret; 86 return ret;
83 } 87 }
88 bio_get(bio);
84 89
85 return 0; 90 return 0;
86} 91}
@@ -213,7 +218,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
213 int reading = rq_data_dir(rq) == READ; 218 int reading = rq_data_dir(rq) == READ;
214 unsigned long addr = (unsigned long) kbuf; 219 unsigned long addr = (unsigned long) kbuf;
215 int do_copy = 0; 220 int do_copy = 0;
216 struct bio *bio; 221 struct bio *bio, *orig_bio;
217 int ret; 222 int ret;
218 223
219 if (len > (queue_max_hw_sectors(q) << 9)) 224 if (len > (queue_max_hw_sectors(q) << 9))
@@ -236,10 +241,11 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
236 if (do_copy) 241 if (do_copy)
237 rq->rq_flags |= RQF_COPY_USER; 242 rq->rq_flags |= RQF_COPY_USER;
238 243
239 ret = blk_rq_append_bio(rq, bio); 244 orig_bio = bio;
245 ret = blk_rq_append_bio(rq, &bio);
240 if (unlikely(ret)) { 246 if (unlikely(ret)) {
241 /* request is too big */ 247 /* request is too big */
242 bio_put(bio); 248 bio_put(orig_bio);
243 return ret; 249 return ret;
244 } 250 }
245 251
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 825bc29767e6..d19f416d6101 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2226,13 +2226,7 @@ again:
2226out_unlock: 2226out_unlock:
2227 spin_unlock_irq(q->queue_lock); 2227 spin_unlock_irq(q->queue_lock);
2228out: 2228out:
2229 /* 2229 bio_set_flag(bio, BIO_THROTTLED);
2230 * As multiple blk-throtls may stack in the same issue path, we
2231 * don't want bios to leave with the flag set. Clear the flag if
2232 * being issued.
2233 */
2234 if (!throttled)
2235 bio_clear_flag(bio, BIO_THROTTLED);
2236 2230
2237#ifdef CONFIG_BLK_DEV_THROTTLING_LOW 2231#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
2238 if (throttled || !td->track_bio_latency) 2232 if (throttled || !td->track_bio_latency)
diff --git a/block/bounce.c b/block/bounce.c
index fceb1a96480b..1d05c422c932 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -200,6 +200,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
200 unsigned i = 0; 200 unsigned i = 0;
201 bool bounce = false; 201 bool bounce = false;
202 int sectors = 0; 202 int sectors = 0;
203 bool passthrough = bio_is_passthrough(*bio_orig);
203 204
204 bio_for_each_segment(from, *bio_orig, iter) { 205 bio_for_each_segment(from, *bio_orig, iter) {
205 if (i++ < BIO_MAX_PAGES) 206 if (i++ < BIO_MAX_PAGES)
@@ -210,13 +211,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
210 if (!bounce) 211 if (!bounce)
211 return; 212 return;
212 213
213 if (sectors < bio_sectors(*bio_orig)) { 214 if (!passthrough && sectors < bio_sectors(*bio_orig)) {
214 bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split); 215 bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
215 bio_chain(bio, *bio_orig); 216 bio_chain(bio, *bio_orig);
216 generic_make_request(*bio_orig); 217 generic_make_request(*bio_orig);
217 *bio_orig = bio; 218 *bio_orig = bio;
218 } 219 }
219 bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set); 220 bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL :
221 bounce_bio_set);
220 222
221 bio_for_each_segment_all(to, bio, i) { 223 bio_for_each_segment_all(to, bio, i) {
222 struct page *page = to->bv_page; 224 struct page *page = to->bv_page;
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index b4df317c2916..f95c60774ce8 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -100,9 +100,13 @@ struct kyber_hctx_data {
100 unsigned int cur_domain; 100 unsigned int cur_domain;
101 unsigned int batching; 101 unsigned int batching;
102 wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS]; 102 wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
103 struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
103 atomic_t wait_index[KYBER_NUM_DOMAINS]; 104 atomic_t wait_index[KYBER_NUM_DOMAINS];
104}; 105};
105 106
107static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
108 void *key);
109
106static int rq_sched_domain(const struct request *rq) 110static int rq_sched_domain(const struct request *rq)
107{ 111{
108 unsigned int op = rq->cmd_flags; 112 unsigned int op = rq->cmd_flags;
@@ -385,6 +389,9 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
385 389
386 for (i = 0; i < KYBER_NUM_DOMAINS; i++) { 390 for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
387 INIT_LIST_HEAD(&khd->rqs[i]); 391 INIT_LIST_HEAD(&khd->rqs[i]);
392 init_waitqueue_func_entry(&khd->domain_wait[i],
393 kyber_domain_wake);
394 khd->domain_wait[i].private = hctx;
388 INIT_LIST_HEAD(&khd->domain_wait[i].entry); 395 INIT_LIST_HEAD(&khd->domain_wait[i].entry);
389 atomic_set(&khd->wait_index[i], 0); 396 atomic_set(&khd->wait_index[i], 0);
390 } 397 }
@@ -524,35 +531,39 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
524 int nr; 531 int nr;
525 532
526 nr = __sbitmap_queue_get(domain_tokens); 533 nr = __sbitmap_queue_get(domain_tokens);
527 if (nr >= 0)
528 return nr;
529 534
530 /* 535 /*
531 * If we failed to get a domain token, make sure the hardware queue is 536 * If we failed to get a domain token, make sure the hardware queue is
532 * run when one becomes available. Note that this is serialized on 537 * run when one becomes available. Note that this is serialized on
533 * khd->lock, but we still need to be careful about the waker. 538 * khd->lock, but we still need to be careful about the waker.
534 */ 539 */
535 if (list_empty_careful(&wait->entry)) { 540 if (nr < 0 && list_empty_careful(&wait->entry)) {
536 init_waitqueue_func_entry(wait, kyber_domain_wake);
537 wait->private = hctx;
538 ws = sbq_wait_ptr(domain_tokens, 541 ws = sbq_wait_ptr(domain_tokens,
539 &khd->wait_index[sched_domain]); 542 &khd->wait_index[sched_domain]);
543 khd->domain_ws[sched_domain] = ws;
540 add_wait_queue(&ws->wait, wait); 544 add_wait_queue(&ws->wait, wait);
541 545
542 /* 546 /*
543 * Try again in case a token was freed before we got on the wait 547 * Try again in case a token was freed before we got on the wait
544 * queue. The waker may have already removed the entry from the 548 * queue.
545 * wait queue, but list_del_init() is okay with that.
546 */ 549 */
547 nr = __sbitmap_queue_get(domain_tokens); 550 nr = __sbitmap_queue_get(domain_tokens);
548 if (nr >= 0) { 551 }
549 unsigned long flags;
550 552
551 spin_lock_irqsave(&ws->wait.lock, flags); 553 /*
552 list_del_init(&wait->entry); 554 * If we got a token while we were on the wait queue, remove ourselves
553 spin_unlock_irqrestore(&ws->wait.lock, flags); 555 * from the wait queue to ensure that all wake ups make forward
554 } 556 * progress. It's possible that the waker already deleted the entry
557 * between the !list_empty_careful() check and us grabbing the lock, but
558 * list_del_init() is okay with that.
559 */
560 if (nr >= 0 && !list_empty_careful(&wait->entry)) {
561 ws = khd->domain_ws[sched_domain];
562 spin_lock_irq(&ws->wait.lock);
563 list_del_init(&wait->entry);
564 spin_unlock_irq(&ws->wait.lock);
555 } 565 }
566
556 return nr; 567 return nr;
557} 568}
558 569
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 415a54ced4d6..444a387df219 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -1138,12 +1138,6 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
1138 if (!af_alg_readable(sk)) 1138 if (!af_alg_readable(sk))
1139 break; 1139 break;
1140 1140
1141 if (!ctx->used) {
1142 err = af_alg_wait_for_data(sk, flags);
1143 if (err)
1144 return err;
1145 }
1146
1147 seglen = min_t(size_t, (maxsize - len), 1141 seglen = min_t(size_t, (maxsize - len),
1148 msg_data_left(msg)); 1142 msg_data_left(msg));
1149 1143
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 48b34e9c6834..ddcc45f77edd 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -111,6 +111,12 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
111 size_t usedpages = 0; /* [in] RX bufs to be used from user */ 111 size_t usedpages = 0; /* [in] RX bufs to be used from user */
112 size_t processed = 0; /* [in] TX bufs to be consumed */ 112 size_t processed = 0; /* [in] TX bufs to be consumed */
113 113
114 if (!ctx->used) {
115 err = af_alg_wait_for_data(sk, flags);
116 if (err)
117 return err;
118 }
119
114 /* 120 /*
115 * Data length provided by caller via sendmsg/sendpage that has not 121 * Data length provided by caller via sendmsg/sendpage that has not
116 * yet been processed. 122 * yet been processed.
@@ -285,6 +291,10 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
285 /* AIO operation */ 291 /* AIO operation */
286 sock_hold(sk); 292 sock_hold(sk);
287 areq->iocb = msg->msg_iocb; 293 areq->iocb = msg->msg_iocb;
294
295 /* Remember output size that will be generated. */
296 areq->outlen = outlen;
297
288 aead_request_set_callback(&areq->cra_u.aead_req, 298 aead_request_set_callback(&areq->cra_u.aead_req,
289 CRYPTO_TFM_REQ_MAY_BACKLOG, 299 CRYPTO_TFM_REQ_MAY_BACKLOG,
290 af_alg_async_cb, areq); 300 af_alg_async_cb, areq);
@@ -292,12 +302,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
292 crypto_aead_decrypt(&areq->cra_u.aead_req); 302 crypto_aead_decrypt(&areq->cra_u.aead_req);
293 303
294 /* AIO operation in progress */ 304 /* AIO operation in progress */
295 if (err == -EINPROGRESS || err == -EBUSY) { 305 if (err == -EINPROGRESS || err == -EBUSY)
296 /* Remember output size that will be generated. */
297 areq->outlen = outlen;
298
299 return -EIOCBQUEUED; 306 return -EIOCBQUEUED;
300 }
301 307
302 sock_put(sk); 308 sock_put(sk);
303 } else { 309 } else {
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index 30cff827dd8f..baef9bfccdda 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -72,6 +72,12 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
72 int err = 0; 72 int err = 0;
73 size_t len = 0; 73 size_t len = 0;
74 74
75 if (!ctx->used) {
76 err = af_alg_wait_for_data(sk, flags);
77 if (err)
78 return err;
79 }
80
75 /* Allocate cipher request for current operation. */ 81 /* Allocate cipher request for current operation. */
76 areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) + 82 areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) +
77 crypto_skcipher_reqsize(tfm)); 83 crypto_skcipher_reqsize(tfm));
@@ -119,6 +125,10 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
119 /* AIO operation */ 125 /* AIO operation */
120 sock_hold(sk); 126 sock_hold(sk);
121 areq->iocb = msg->msg_iocb; 127 areq->iocb = msg->msg_iocb;
128
129 /* Remember output size that will be generated. */
130 areq->outlen = len;
131
122 skcipher_request_set_callback(&areq->cra_u.skcipher_req, 132 skcipher_request_set_callback(&areq->cra_u.skcipher_req,
123 CRYPTO_TFM_REQ_MAY_SLEEP, 133 CRYPTO_TFM_REQ_MAY_SLEEP,
124 af_alg_async_cb, areq); 134 af_alg_async_cb, areq);
@@ -127,12 +137,8 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
127 crypto_skcipher_decrypt(&areq->cra_u.skcipher_req); 137 crypto_skcipher_decrypt(&areq->cra_u.skcipher_req);
128 138
129 /* AIO operation in progress */ 139 /* AIO operation in progress */
130 if (err == -EINPROGRESS || err == -EBUSY) { 140 if (err == -EINPROGRESS || err == -EBUSY)
131 /* Remember output size that will be generated. */
132 areq->outlen = len;
133
134 return -EIOCBQUEUED; 141 return -EIOCBQUEUED;
135 }
136 142
137 sock_put(sk); 143 sock_put(sk);
138 } else { 144 } else {
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c
index 4e6472658852..eca04d3729b3 100644
--- a/crypto/mcryptd.c
+++ b/crypto/mcryptd.c
@@ -81,6 +81,7 @@ static int mcryptd_init_queue(struct mcryptd_queue *queue,
81 pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue); 81 pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue);
82 crypto_init_queue(&cpu_queue->queue, max_cpu_qlen); 82 crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
83 INIT_WORK(&cpu_queue->work, mcryptd_queue_worker); 83 INIT_WORK(&cpu_queue->work, mcryptd_queue_worker);
84 spin_lock_init(&cpu_queue->q_lock);
84 } 85 }
85 return 0; 86 return 0;
86} 87}
@@ -104,15 +105,16 @@ static int mcryptd_enqueue_request(struct mcryptd_queue *queue,
104 int cpu, err; 105 int cpu, err;
105 struct mcryptd_cpu_queue *cpu_queue; 106 struct mcryptd_cpu_queue *cpu_queue;
106 107
107 cpu = get_cpu(); 108 cpu_queue = raw_cpu_ptr(queue->cpu_queue);
108 cpu_queue = this_cpu_ptr(queue->cpu_queue); 109 spin_lock(&cpu_queue->q_lock);
109 rctx->tag.cpu = cpu; 110 cpu = smp_processor_id();
111 rctx->tag.cpu = smp_processor_id();
110 112
111 err = crypto_enqueue_request(&cpu_queue->queue, request); 113 err = crypto_enqueue_request(&cpu_queue->queue, request);
112 pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n", 114 pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n",
113 cpu, cpu_queue, request); 115 cpu, cpu_queue, request);
116 spin_unlock(&cpu_queue->q_lock);
114 queue_work_on(cpu, kcrypto_wq, &cpu_queue->work); 117 queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
115 put_cpu();
116 118
117 return err; 119 return err;
118} 120}
@@ -161,16 +163,11 @@ static void mcryptd_queue_worker(struct work_struct *work)
161 cpu_queue = container_of(work, struct mcryptd_cpu_queue, work); 163 cpu_queue = container_of(work, struct mcryptd_cpu_queue, work);
162 i = 0; 164 i = 0;
163 while (i < MCRYPTD_BATCH || single_task_running()) { 165 while (i < MCRYPTD_BATCH || single_task_running()) {
164 /* 166
165 * preempt_disable/enable is used to prevent 167 spin_lock_bh(&cpu_queue->q_lock);
166 * being preempted by mcryptd_enqueue_request()
167 */
168 local_bh_disable();
169 preempt_disable();
170 backlog = crypto_get_backlog(&cpu_queue->queue); 168 backlog = crypto_get_backlog(&cpu_queue->queue);
171 req = crypto_dequeue_request(&cpu_queue->queue); 169 req = crypto_dequeue_request(&cpu_queue->queue);
172 preempt_enable(); 170 spin_unlock_bh(&cpu_queue->q_lock);
173 local_bh_enable();
174 171
175 if (!req) { 172 if (!req) {
176 mcryptd_opportunistic_flush(); 173 mcryptd_opportunistic_flush();
@@ -185,7 +182,7 @@ static void mcryptd_queue_worker(struct work_struct *work)
185 ++i; 182 ++i;
186 } 183 }
187 if (cpu_queue->queue.qlen) 184 if (cpu_queue->queue.qlen)
188 queue_work(kcrypto_wq, &cpu_queue->work); 185 queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work);
189} 186}
190 187
191void mcryptd_flusher(struct work_struct *__work) 188void mcryptd_flusher(struct work_struct *__work)
diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index 778e0ff42bfa..11af5fd6a443 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -449,6 +449,8 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk,
449 449
450 walk->total = req->cryptlen; 450 walk->total = req->cryptlen;
451 walk->nbytes = 0; 451 walk->nbytes = 0;
452 walk->iv = req->iv;
453 walk->oiv = req->iv;
452 454
453 if (unlikely(!walk->total)) 455 if (unlikely(!walk->total))
454 return 0; 456 return 0;
@@ -456,9 +458,6 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk,
456 scatterwalk_start(&walk->in, req->src); 458 scatterwalk_start(&walk->in, req->src);
457 scatterwalk_start(&walk->out, req->dst); 459 scatterwalk_start(&walk->out, req->dst);
458 460
459 walk->iv = req->iv;
460 walk->oiv = req->iv;
461
462 walk->flags &= ~SKCIPHER_WALK_SLEEP; 461 walk->flags &= ~SKCIPHER_WALK_SLEEP;
463 walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? 462 walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
464 SKCIPHER_WALK_SLEEP : 0; 463 SKCIPHER_WALK_SLEEP : 0;
@@ -510,6 +509,8 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk,
510 int err; 509 int err;
511 510
512 walk->nbytes = 0; 511 walk->nbytes = 0;
512 walk->iv = req->iv;
513 walk->oiv = req->iv;
513 514
514 if (unlikely(!walk->total)) 515 if (unlikely(!walk->total))
515 return 0; 516 return 0;
@@ -525,9 +526,6 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk,
525 scatterwalk_done(&walk->in, 0, walk->total); 526 scatterwalk_done(&walk->in, 0, walk->total);
526 scatterwalk_done(&walk->out, 0, walk->total); 527 scatterwalk_done(&walk->out, 0, walk->total);
527 528
528 walk->iv = req->iv;
529 walk->oiv = req->iv;
530
531 if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) 529 if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP)
532 walk->flags |= SKCIPHER_WALK_SLEEP; 530 walk->flags |= SKCIPHER_WALK_SLEEP;
533 else 531 else
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 6742f6c68034..9bff853e85f3 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -1007,7 +1007,7 @@ skip:
1007 /* The record may be cleared by others, try read next record */ 1007 /* The record may be cleared by others, try read next record */
1008 if (len == -ENOENT) 1008 if (len == -ENOENT)
1009 goto skip; 1009 goto skip;
1010 else if (len < sizeof(*rcd)) { 1010 else if (len < 0 || len < sizeof(*rcd)) {
1011 rc = -EIO; 1011 rc = -EIO;
1012 goto out; 1012 goto out;
1013 } 1013 }
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 30e84cc600ae..06ea4749ebd9 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -1171,7 +1171,7 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
1171 struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); 1171 struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
1172 struct cpc_register_resource *desired_reg; 1172 struct cpc_register_resource *desired_reg;
1173 int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); 1173 int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
1174 struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id]; 1174 struct cppc_pcc_data *pcc_ss_data;
1175 int ret = 0; 1175 int ret = 0;
1176 1176
1177 if (!cpc_desc || pcc_ss_id < 0) { 1177 if (!cpc_desc || pcc_ss_id < 0) {
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index ff2580e7611d..abeb4df4f22e 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1670,6 +1670,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
1670 dev_name(&adev_dimm->dev)); 1670 dev_name(&adev_dimm->dev));
1671 return -ENXIO; 1671 return -ENXIO;
1672 } 1672 }
1673 /*
1674 * Record nfit_mem for the notification path to track back to
1675 * the nfit sysfs attributes for this dimm device object.
1676 */
1677 dev_set_drvdata(&adev_dimm->dev, nfit_mem);
1673 1678
1674 /* 1679 /*
1675 * Until standardization materializes we need to consider 4 1680 * Until standardization materializes we need to consider 4
@@ -1752,9 +1757,11 @@ static void shutdown_dimm_notify(void *data)
1752 sysfs_put(nfit_mem->flags_attr); 1757 sysfs_put(nfit_mem->flags_attr);
1753 nfit_mem->flags_attr = NULL; 1758 nfit_mem->flags_attr = NULL;
1754 } 1759 }
1755 if (adev_dimm) 1760 if (adev_dimm) {
1756 acpi_remove_notify_handler(adev_dimm->handle, 1761 acpi_remove_notify_handler(adev_dimm->handle,
1757 ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify); 1762 ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify);
1763 dev_set_drvdata(&adev_dimm->dev, NULL);
1764 }
1758 } 1765 }
1759 mutex_unlock(&acpi_desc->init_mutex); 1766 mutex_unlock(&acpi_desc->init_mutex);
1760} 1767}
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index ccb9975a97fa..ad0477ae820f 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -35,13 +35,13 @@ static inline u64 mb_per_tick(int mbps)
35struct nullb_cmd { 35struct nullb_cmd {
36 struct list_head list; 36 struct list_head list;
37 struct llist_node ll_list; 37 struct llist_node ll_list;
38 call_single_data_t csd; 38 struct __call_single_data csd;
39 struct request *rq; 39 struct request *rq;
40 struct bio *bio; 40 struct bio *bio;
41 unsigned int tag; 41 unsigned int tag;
42 blk_status_t error;
42 struct nullb_queue *nq; 43 struct nullb_queue *nq;
43 struct hrtimer timer; 44 struct hrtimer timer;
44 blk_status_t error;
45}; 45};
46 46
47struct nullb_queue { 47struct nullb_queue {
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 647d056df88c..8a1860a36c77 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -1564,6 +1564,9 @@ static void clk_change_rate(struct clk_core *core)
1564 best_parent_rate = core->parent->rate; 1564 best_parent_rate = core->parent->rate;
1565 } 1565 }
1566 1566
1567 if (clk_pm_runtime_get(core))
1568 return;
1569
1567 if (core->flags & CLK_SET_RATE_UNGATE) { 1570 if (core->flags & CLK_SET_RATE_UNGATE) {
1568 unsigned long flags; 1571 unsigned long flags;
1569 1572
@@ -1634,6 +1637,8 @@ static void clk_change_rate(struct clk_core *core)
1634 /* handle the new child who might not be in core->children yet */ 1637 /* handle the new child who might not be in core->children yet */
1635 if (core->new_child) 1638 if (core->new_child)
1636 clk_change_rate(core->new_child); 1639 clk_change_rate(core->new_child);
1640
1641 clk_pm_runtime_put(core);
1637} 1642}
1638 1643
1639static int clk_core_set_rate_nolock(struct clk_core *core, 1644static int clk_core_set_rate_nolock(struct clk_core *core,
diff --git a/drivers/clk/sunxi/clk-sun9i-mmc.c b/drivers/clk/sunxi/clk-sun9i-mmc.c
index a1a634253d6f..f00d8758ba24 100644
--- a/drivers/clk/sunxi/clk-sun9i-mmc.c
+++ b/drivers/clk/sunxi/clk-sun9i-mmc.c
@@ -16,6 +16,7 @@
16 16
17#include <linux/clk.h> 17#include <linux/clk.h>
18#include <linux/clk-provider.h> 18#include <linux/clk-provider.h>
19#include <linux/delay.h>
19#include <linux/init.h> 20#include <linux/init.h>
20#include <linux/of.h> 21#include <linux/of.h>
21#include <linux/of_device.h> 22#include <linux/of_device.h>
@@ -83,9 +84,20 @@ static int sun9i_mmc_reset_deassert(struct reset_controller_dev *rcdev,
83 return 0; 84 return 0;
84} 85}
85 86
87static int sun9i_mmc_reset_reset(struct reset_controller_dev *rcdev,
88 unsigned long id)
89{
90 sun9i_mmc_reset_assert(rcdev, id);
91 udelay(10);
92 sun9i_mmc_reset_deassert(rcdev, id);
93
94 return 0;
95}
96
86static const struct reset_control_ops sun9i_mmc_reset_ops = { 97static const struct reset_control_ops sun9i_mmc_reset_ops = {
87 .assert = sun9i_mmc_reset_assert, 98 .assert = sun9i_mmc_reset_assert,
88 .deassert = sun9i_mmc_reset_deassert, 99 .deassert = sun9i_mmc_reset_deassert,
100 .reset = sun9i_mmc_reset_reset,
89}; 101};
90 102
91static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev) 103static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev)
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 58d4f4e1ad6a..ca38229b045a 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -22,6 +22,8 @@
22 22
23#include "cpufreq_governor.h" 23#include "cpufreq_governor.h"
24 24
25#define CPUFREQ_DBS_MIN_SAMPLING_INTERVAL (2 * TICK_NSEC / NSEC_PER_USEC)
26
25static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs); 27static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs);
26 28
27static DEFINE_MUTEX(gov_dbs_data_mutex); 29static DEFINE_MUTEX(gov_dbs_data_mutex);
@@ -47,11 +49,15 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
47{ 49{
48 struct dbs_data *dbs_data = to_dbs_data(attr_set); 50 struct dbs_data *dbs_data = to_dbs_data(attr_set);
49 struct policy_dbs_info *policy_dbs; 51 struct policy_dbs_info *policy_dbs;
52 unsigned int sampling_interval;
50 int ret; 53 int ret;
51 ret = sscanf(buf, "%u", &dbs_data->sampling_rate); 54
52 if (ret != 1) 55 ret = sscanf(buf, "%u", &sampling_interval);
56 if (ret != 1 || sampling_interval < CPUFREQ_DBS_MIN_SAMPLING_INTERVAL)
53 return -EINVAL; 57 return -EINVAL;
54 58
59 dbs_data->sampling_rate = sampling_interval;
60
55 /* 61 /*
56 * We are operating under dbs_data->mutex and so the list and its 62 * We are operating under dbs_data->mutex and so the list and its
57 * entries can't be freed concurrently. 63 * entries can't be freed concurrently.
@@ -430,7 +436,14 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
430 if (ret) 436 if (ret)
431 goto free_policy_dbs_info; 437 goto free_policy_dbs_info;
432 438
433 dbs_data->sampling_rate = cpufreq_policy_transition_delay_us(policy); 439 /*
440 * The sampling interval should not be less than the transition latency
441 * of the CPU and it also cannot be too small for dbs_update() to work
442 * correctly.
443 */
444 dbs_data->sampling_rate = max_t(unsigned int,
445 CPUFREQ_DBS_MIN_SAMPLING_INTERVAL,
446 cpufreq_policy_transition_delay_us(policy));
434 447
435 if (!have_governor_per_policy()) 448 if (!have_governor_per_policy())
436 gov->gdbs_data = dbs_data; 449 gov->gdbs_data = dbs_data;
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 628fe899cb48..d9b2c2de49c4 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -226,17 +226,18 @@ static void imx6q_opp_check_speed_grading(struct device *dev)
226 val >>= OCOTP_CFG3_SPEED_SHIFT; 226 val >>= OCOTP_CFG3_SPEED_SHIFT;
227 val &= 0x3; 227 val &= 0x3;
228 228
229 if ((val != OCOTP_CFG3_SPEED_1P2GHZ) &&
230 of_machine_is_compatible("fsl,imx6q"))
231 if (dev_pm_opp_disable(dev, 1200000000))
232 dev_warn(dev, "failed to disable 1.2GHz OPP\n");
233 if (val < OCOTP_CFG3_SPEED_996MHZ) 229 if (val < OCOTP_CFG3_SPEED_996MHZ)
234 if (dev_pm_opp_disable(dev, 996000000)) 230 if (dev_pm_opp_disable(dev, 996000000))
235 dev_warn(dev, "failed to disable 996MHz OPP\n"); 231 dev_warn(dev, "failed to disable 996MHz OPP\n");
236 if (of_machine_is_compatible("fsl,imx6q")) { 232
233 if (of_machine_is_compatible("fsl,imx6q") ||
234 of_machine_is_compatible("fsl,imx6qp")) {
237 if (val != OCOTP_CFG3_SPEED_852MHZ) 235 if (val != OCOTP_CFG3_SPEED_852MHZ)
238 if (dev_pm_opp_disable(dev, 852000000)) 236 if (dev_pm_opp_disable(dev, 852000000))
239 dev_warn(dev, "failed to disable 852MHz OPP\n"); 237 dev_warn(dev, "failed to disable 852MHz OPP\n");
238 if (val != OCOTP_CFG3_SPEED_1P2GHZ)
239 if (dev_pm_opp_disable(dev, 1200000000))
240 dev_warn(dev, "failed to disable 1.2GHz OPP\n");
240 } 241 }
241 iounmap(base); 242 iounmap(base);
242put_node: 243put_node:
diff --git a/drivers/gpio/gpio-reg.c b/drivers/gpio/gpio-reg.c
index 23e771dba4c1..e85903eddc68 100644
--- a/drivers/gpio/gpio-reg.c
+++ b/drivers/gpio/gpio-reg.c
@@ -103,8 +103,8 @@ static int gpio_reg_to_irq(struct gpio_chip *gc, unsigned offset)
103 struct gpio_reg *r = to_gpio_reg(gc); 103 struct gpio_reg *r = to_gpio_reg(gc);
104 int irq = r->irqs[offset]; 104 int irq = r->irqs[offset];
105 105
106 if (irq >= 0 && r->irq.domain) 106 if (irq >= 0 && r->irqdomain)
107 irq = irq_find_mapping(r->irq.domain, irq); 107 irq = irq_find_mapping(r->irqdomain, irq);
108 108
109 return irq; 109 return irq;
110} 110}
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index eb4528c87c0b..d6f3d9ee1350 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -1074,7 +1074,7 @@ void acpi_gpiochip_add(struct gpio_chip *chip)
1074 } 1074 }
1075 1075
1076 if (!chip->names) 1076 if (!chip->names)
1077 devprop_gpiochip_set_names(chip); 1077 devprop_gpiochip_set_names(chip, dev_fwnode(chip->parent));
1078 1078
1079 acpi_gpiochip_request_regions(acpi_gpio); 1079 acpi_gpiochip_request_regions(acpi_gpio);
1080 acpi_gpiochip_scan_gpios(acpi_gpio); 1080 acpi_gpiochip_scan_gpios(acpi_gpio);
diff --git a/drivers/gpio/gpiolib-devprop.c b/drivers/gpio/gpiolib-devprop.c
index 27f383bda7d9..f748aa3e77f7 100644
--- a/drivers/gpio/gpiolib-devprop.c
+++ b/drivers/gpio/gpiolib-devprop.c
@@ -19,30 +19,27 @@
19/** 19/**
20 * devprop_gpiochip_set_names - Set GPIO line names using device properties 20 * devprop_gpiochip_set_names - Set GPIO line names using device properties
21 * @chip: GPIO chip whose lines should be named, if possible 21 * @chip: GPIO chip whose lines should be named, if possible
22 * @fwnode: Property Node containing the gpio-line-names property
22 * 23 *
23 * Looks for device property "gpio-line-names" and if it exists assigns 24 * Looks for device property "gpio-line-names" and if it exists assigns
24 * GPIO line names for the chip. The memory allocated for the assigned 25 * GPIO line names for the chip. The memory allocated for the assigned
25 * names belong to the underlying firmware node and should not be released 26 * names belong to the underlying firmware node and should not be released
26 * by the caller. 27 * by the caller.
27 */ 28 */
28void devprop_gpiochip_set_names(struct gpio_chip *chip) 29void devprop_gpiochip_set_names(struct gpio_chip *chip,
30 const struct fwnode_handle *fwnode)
29{ 31{
30 struct gpio_device *gdev = chip->gpiodev; 32 struct gpio_device *gdev = chip->gpiodev;
31 const char **names; 33 const char **names;
32 int ret, i; 34 int ret, i;
33 35
34 if (!chip->parent) { 36 ret = fwnode_property_read_string_array(fwnode, "gpio-line-names",
35 dev_warn(&gdev->dev, "GPIO chip parent is NULL\n");
36 return;
37 }
38
39 ret = device_property_read_string_array(chip->parent, "gpio-line-names",
40 NULL, 0); 37 NULL, 0);
41 if (ret < 0) 38 if (ret < 0)
42 return; 39 return;
43 40
44 if (ret != gdev->ngpio) { 41 if (ret != gdev->ngpio) {
45 dev_warn(chip->parent, 42 dev_warn(&gdev->dev,
46 "names %d do not match number of GPIOs %d\n", ret, 43 "names %d do not match number of GPIOs %d\n", ret,
47 gdev->ngpio); 44 gdev->ngpio);
48 return; 45 return;
@@ -52,10 +49,10 @@ void devprop_gpiochip_set_names(struct gpio_chip *chip)
52 if (!names) 49 if (!names)
53 return; 50 return;
54 51
55 ret = device_property_read_string_array(chip->parent, "gpio-line-names", 52 ret = fwnode_property_read_string_array(fwnode, "gpio-line-names",
56 names, gdev->ngpio); 53 names, gdev->ngpio);
57 if (ret < 0) { 54 if (ret < 0) {
58 dev_warn(chip->parent, "failed to read GPIO line names\n"); 55 dev_warn(&gdev->dev, "failed to read GPIO line names\n");
59 kfree(names); 56 kfree(names);
60 return; 57 return;
61 } 58 }
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index e0d59e61b52f..72a0695d2ac3 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -493,7 +493,8 @@ int of_gpiochip_add(struct gpio_chip *chip)
493 493
494 /* If the chip defines names itself, these take precedence */ 494 /* If the chip defines names itself, these take precedence */
495 if (!chip->names) 495 if (!chip->names)
496 devprop_gpiochip_set_names(chip); 496 devprop_gpiochip_set_names(chip,
497 of_fwnode_handle(chip->of_node));
497 498
498 of_node_get(chip->of_node); 499 of_node_get(chip->of_node);
499 500
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index af48322839c3..6c44d1652139 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -228,7 +228,8 @@ static inline int gpio_chip_hwgpio(const struct gpio_desc *desc)
228 return desc - &desc->gdev->descs[0]; 228 return desc - &desc->gdev->descs[0];
229} 229}
230 230
231void devprop_gpiochip_set_names(struct gpio_chip *chip); 231void devprop_gpiochip_set_names(struct gpio_chip *chip,
232 const struct fwnode_handle *fwnode);
232 233
233/* With descriptor prefix */ 234/* With descriptor prefix */
234 235
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index da43813d67a4..5aeb5f8816f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2467,7 +2467,7 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2467 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 2467 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2468 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 2468 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2469 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 2469 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2470 PACKET3_MAP_QUEUES_ALLOC_FORMAT(1) | /* alloc format: all_on_one_pipe */ 2470 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2471 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ 2471 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2472 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 2472 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2473 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 2473 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index f71fe6d2ddda..bb5fa895fb64 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2336,7 +2336,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
2336 const struct dm_connector_state *dm_state) 2336 const struct dm_connector_state *dm_state)
2337{ 2337{
2338 struct drm_display_mode *preferred_mode = NULL; 2338 struct drm_display_mode *preferred_mode = NULL;
2339 const struct drm_connector *drm_connector; 2339 struct drm_connector *drm_connector;
2340 struct dc_stream_state *stream = NULL; 2340 struct dc_stream_state *stream = NULL;
2341 struct drm_display_mode mode = *drm_mode; 2341 struct drm_display_mode mode = *drm_mode;
2342 bool native_mode_found = false; 2342 bool native_mode_found = false;
@@ -2355,11 +2355,13 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
2355 2355
2356 if (!aconnector->dc_sink) { 2356 if (!aconnector->dc_sink) {
2357 /* 2357 /*
2358 * Exclude MST from creating fake_sink 2358 * Create dc_sink when necessary to MST
2359 * TODO: need to enable MST into fake_sink feature 2359 * Don't apply fake_sink to MST
2360 */ 2360 */
2361 if (aconnector->mst_port) 2361 if (aconnector->mst_port) {
2362 goto stream_create_fail; 2362 dm_dp_mst_dc_sink_create(drm_connector);
2363 goto mst_dc_sink_create_done;
2364 }
2363 2365
2364 if (create_fake_sink(aconnector)) 2366 if (create_fake_sink(aconnector))
2365 goto stream_create_fail; 2367 goto stream_create_fail;
@@ -2410,6 +2412,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
2410stream_create_fail: 2412stream_create_fail:
2411dm_state_null: 2413dm_state_null:
2412drm_connector_null: 2414drm_connector_null:
2415mst_dc_sink_create_done:
2413 return stream; 2416 return stream;
2414} 2417}
2415 2418
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 117521c6a6ed..0230250a1164 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -189,6 +189,8 @@ struct amdgpu_dm_connector {
189 struct mutex hpd_lock; 189 struct mutex hpd_lock;
190 190
191 bool fake_enable; 191 bool fake_enable;
192
193 bool mst_connected;
192}; 194};
193 195
194#define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base) 196#define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index f8efb98b1fa7..638c2c2b5cd7 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -185,6 +185,42 @@ static int dm_connector_update_modes(struct drm_connector *connector,
185 return ret; 185 return ret;
186} 186}
187 187
188void dm_dp_mst_dc_sink_create(struct drm_connector *connector)
189{
190 struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
191 struct edid *edid;
192 struct dc_sink *dc_sink;
193 struct dc_sink_init_data init_params = {
194 .link = aconnector->dc_link,
195 .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST };
196
197 edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port);
198
199 if (!edid) {
200 drm_mode_connector_update_edid_property(
201 &aconnector->base,
202 NULL);
203 return;
204 }
205
206 aconnector->edid = edid;
207
208 dc_sink = dc_link_add_remote_sink(
209 aconnector->dc_link,
210 (uint8_t *)aconnector->edid,
211 (aconnector->edid->extensions + 1) * EDID_LENGTH,
212 &init_params);
213
214 dc_sink->priv = aconnector;
215 aconnector->dc_sink = dc_sink;
216
217 amdgpu_dm_add_sink_to_freesync_module(
218 connector, aconnector->edid);
219
220 drm_mode_connector_update_edid_property(
221 &aconnector->base, aconnector->edid);
222}
223
188static int dm_dp_mst_get_modes(struct drm_connector *connector) 224static int dm_dp_mst_get_modes(struct drm_connector *connector)
189{ 225{
190 struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); 226 struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
@@ -311,6 +347,7 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
311 drm_mode_connector_set_path_property(connector, pathprop); 347 drm_mode_connector_set_path_property(connector, pathprop);
312 348
313 drm_connector_list_iter_end(&conn_iter); 349 drm_connector_list_iter_end(&conn_iter);
350 aconnector->mst_connected = true;
314 return &aconnector->base; 351 return &aconnector->base;
315 } 352 }
316 } 353 }
@@ -363,6 +400,8 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
363 */ 400 */
364 amdgpu_dm_connector_funcs_reset(connector); 401 amdgpu_dm_connector_funcs_reset(connector);
365 402
403 aconnector->mst_connected = true;
404
366 DRM_INFO("DM_MST: added connector: %p [id: %d] [master: %p]\n", 405 DRM_INFO("DM_MST: added connector: %p [id: %d] [master: %p]\n",
367 aconnector, connector->base.id, aconnector->mst_port); 406 aconnector, connector->base.id, aconnector->mst_port);
368 407
@@ -394,6 +433,8 @@ static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
394 drm_mode_connector_update_edid_property( 433 drm_mode_connector_update_edid_property(
395 &aconnector->base, 434 &aconnector->base,
396 NULL); 435 NULL);
436
437 aconnector->mst_connected = false;
397} 438}
398 439
399static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr) 440static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr)
@@ -404,10 +445,18 @@ static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr)
404 drm_kms_helper_hotplug_event(dev); 445 drm_kms_helper_hotplug_event(dev);
405} 446}
406 447
448static void dm_dp_mst_link_status_reset(struct drm_connector *connector)
449{
450 mutex_lock(&connector->dev->mode_config.mutex);
451 drm_mode_connector_set_link_status_property(connector, DRM_MODE_LINK_STATUS_BAD);
452 mutex_unlock(&connector->dev->mode_config.mutex);
453}
454
407static void dm_dp_mst_register_connector(struct drm_connector *connector) 455static void dm_dp_mst_register_connector(struct drm_connector *connector)
408{ 456{
409 struct drm_device *dev = connector->dev; 457 struct drm_device *dev = connector->dev;
410 struct amdgpu_device *adev = dev->dev_private; 458 struct amdgpu_device *adev = dev->dev_private;
459 struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
411 460
412 if (adev->mode_info.rfbdev) 461 if (adev->mode_info.rfbdev)
413 drm_fb_helper_add_one_connector(&adev->mode_info.rfbdev->helper, connector); 462 drm_fb_helper_add_one_connector(&adev->mode_info.rfbdev->helper, connector);
@@ -416,6 +465,8 @@ static void dm_dp_mst_register_connector(struct drm_connector *connector)
416 465
417 drm_connector_register(connector); 466 drm_connector_register(connector);
418 467
468 if (aconnector->mst_connected)
469 dm_dp_mst_link_status_reset(connector);
419} 470}
420 471
421static const struct drm_dp_mst_topology_cbs dm_mst_cbs = { 472static const struct drm_dp_mst_topology_cbs dm_mst_cbs = {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
index 2da851b40042..8cf51da26657 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
@@ -31,5 +31,6 @@ struct amdgpu_dm_connector;
31 31
32void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm, 32void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
33 struct amdgpu_dm_connector *aconnector); 33 struct amdgpu_dm_connector *aconnector);
34void dm_dp_mst_dc_sink_create(struct drm_connector *connector);
34 35
35#endif 36#endif
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
index 3dce35e66b09..b142629a1058 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
@@ -900,6 +900,15 @@ bool dcn_validate_bandwidth(
900 v->override_vta_ps[input_idx] = pipe->plane_res.scl_data.taps.v_taps; 900 v->override_vta_ps[input_idx] = pipe->plane_res.scl_data.taps.v_taps;
901 v->override_hta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.h_taps_c; 901 v->override_hta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.h_taps_c;
902 v->override_vta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.v_taps_c; 902 v->override_vta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.v_taps_c;
903 /*
904 * Spreadsheet doesn't handle taps_c is one properly,
905 * need to force Chroma to always be scaled to pass
906 * bandwidth validation.
907 */
908 if (v->override_hta_pschroma[input_idx] == 1)
909 v->override_hta_pschroma[input_idx] = 2;
910 if (v->override_vta_pschroma[input_idx] == 1)
911 v->override_vta_pschroma[input_idx] = 2;
903 v->source_scan[input_idx] = (pipe->plane_state->rotation % 2) ? dcn_bw_vert : dcn_bw_hor; 912 v->source_scan[input_idx] = (pipe->plane_state->rotation % 2) ? dcn_bw_vert : dcn_bw_hor;
904 } 913 }
905 if (v->is_line_buffer_bpp_fixed == dcn_bw_yes) 914 if (v->is_line_buffer_bpp_fixed == dcn_bw_yes)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index e27ed4a45265..42a111b9505d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -1801,7 +1801,7 @@ static void disable_link(struct dc_link *link, enum signal_type signal)
1801 link->link_enc->funcs->disable_output(link->link_enc, signal, link); 1801 link->link_enc->funcs->disable_output(link->link_enc, signal, link);
1802} 1802}
1803 1803
1804bool dp_active_dongle_validate_timing( 1804static bool dp_active_dongle_validate_timing(
1805 const struct dc_crtc_timing *timing, 1805 const struct dc_crtc_timing *timing,
1806 const struct dc_dongle_caps *dongle_caps) 1806 const struct dc_dongle_caps *dongle_caps)
1807{ 1807{
@@ -1833,6 +1833,8 @@ bool dp_active_dongle_validate_timing(
1833 /* Check Color Depth and Pixel Clock */ 1833 /* Check Color Depth and Pixel Clock */
1834 if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) 1834 if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420)
1835 required_pix_clk /= 2; 1835 required_pix_clk /= 2;
1836 else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422)
1837 required_pix_clk = required_pix_clk * 2 / 3;
1836 1838
1837 switch (timing->display_color_depth) { 1839 switch (timing->display_color_depth) {
1838 case COLOR_DEPTH_666: 1840 case COLOR_DEPTH_666:
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 07ff8d2faf3f..d844fadcd56f 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -2866,16 +2866,19 @@ static void dce110_apply_ctx_for_surface(
2866 int num_planes, 2866 int num_planes,
2867 struct dc_state *context) 2867 struct dc_state *context)
2868{ 2868{
2869 int i, be_idx; 2869 int i;
2870 2870
2871 if (num_planes == 0) 2871 if (num_planes == 0)
2872 return; 2872 return;
2873 2873
2874 be_idx = -1;
2875 for (i = 0; i < dc->res_pool->pipe_count; i++) { 2874 for (i = 0; i < dc->res_pool->pipe_count; i++) {
2876 if (stream == context->res_ctx.pipe_ctx[i].stream) { 2875 struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
2877 be_idx = context->res_ctx.pipe_ctx[i].stream_res.tg->inst; 2876 struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
2878 break; 2877
2878 if (stream == pipe_ctx->stream) {
2879 if (!pipe_ctx->top_pipe &&
2880 (pipe_ctx->plane_state || old_pipe_ctx->plane_state))
2881 dc->hwss.pipe_control_lock(dc, pipe_ctx, true);
2879 } 2882 }
2880 } 2883 }
2881 2884
@@ -2895,9 +2898,22 @@ static void dce110_apply_ctx_for_surface(
2895 context->stream_count); 2898 context->stream_count);
2896 2899
2897 dce110_program_front_end_for_pipe(dc, pipe_ctx); 2900 dce110_program_front_end_for_pipe(dc, pipe_ctx);
2901
2902 dc->hwss.update_plane_addr(dc, pipe_ctx);
2903
2898 program_surface_visibility(dc, pipe_ctx); 2904 program_surface_visibility(dc, pipe_ctx);
2899 2905
2900 } 2906 }
2907
2908 for (i = 0; i < dc->res_pool->pipe_count; i++) {
2909 struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
2910 struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
2911
2912 if ((stream == pipe_ctx->stream) &&
2913 (!pipe_ctx->top_pipe) &&
2914 (pipe_ctx->plane_state || old_pipe_ctx->plane_state))
2915 dc->hwss.pipe_control_lock(dc, pipe_ctx, false);
2916 }
2901} 2917}
2902 2918
2903static void dce110_power_down_fe(struct dc *dc, int fe_idx) 2919static void dce110_power_down_fe(struct dc *dc, int fe_idx)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
index 74e7c82bdc76..a9d55d0dd69e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
@@ -159,11 +159,10 @@ bool dpp_get_optimal_number_of_taps(
159 scl_data->taps.h_taps = 1; 159 scl_data->taps.h_taps = 1;
160 if (IDENTITY_RATIO(scl_data->ratios.vert)) 160 if (IDENTITY_RATIO(scl_data->ratios.vert))
161 scl_data->taps.v_taps = 1; 161 scl_data->taps.v_taps = 1;
162 /* 162 if (IDENTITY_RATIO(scl_data->ratios.horz_c))
163 * Spreadsheet doesn't handle taps_c is one properly, 163 scl_data->taps.h_taps_c = 1;
164 * need to force Chroma to always be scaled to pass 164 if (IDENTITY_RATIO(scl_data->ratios.vert_c))
165 * bandwidth validation. 165 scl_data->taps.v_taps_c = 1;
166 */
167 } 166 }
168 167
169 return true; 168 return true;
diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c
index 59849f02e2ad..1402c0e71b03 100644
--- a/drivers/gpu/drm/drm_lease.c
+++ b/drivers/gpu/drm/drm_lease.c
@@ -220,17 +220,6 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr
220 220
221 mutex_lock(&dev->mode_config.idr_mutex); 221 mutex_lock(&dev->mode_config.idr_mutex);
222 222
223 /* Insert the new lessee into the tree */
224 id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL);
225 if (id < 0) {
226 error = id;
227 goto out_lessee;
228 }
229
230 lessee->lessee_id = id;
231 lessee->lessor = drm_master_get(lessor);
232 list_add_tail(&lessee->lessee_list, &lessor->lessees);
233
234 idr_for_each_entry(leases, entry, object) { 223 idr_for_each_entry(leases, entry, object) {
235 error = 0; 224 error = 0;
236 if (!idr_find(&dev->mode_config.crtc_idr, object)) 225 if (!idr_find(&dev->mode_config.crtc_idr, object))
@@ -246,6 +235,17 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr
246 } 235 }
247 } 236 }
248 237
238 /* Insert the new lessee into the tree */
239 id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL);
240 if (id < 0) {
241 error = id;
242 goto out_lessee;
243 }
244
245 lessee->lessee_id = id;
246 lessee->lessor = drm_master_get(lessor);
247 list_add_tail(&lessee->lessee_list, &lessor->lessees);
248
249 /* Move the leases over */ 249 /* Move the leases over */
250 lessee->leases = *leases; 250 lessee->leases = *leases;
251 DRM_DEBUG_LEASE("new lessee %d %p, lessor %d %p\n", lessee->lessee_id, lessee, lessor->lessee_id, lessor); 251 DRM_DEBUG_LEASE("new lessee %d %p, lessor %d %p\n", lessee->lessee_id, lessee, lessor->lessee_id, lessor);
diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index 37a93cdffb4a..2c90519576a3 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -558,11 +558,10 @@ int drm_plane_check_pixel_format(const struct drm_plane *plane, u32 format)
558} 558}
559 559
560/* 560/*
561 * setplane_internal - setplane handler for internal callers 561 * __setplane_internal - setplane handler for internal callers
562 * 562 *
563 * Note that we assume an extra reference has already been taken on fb. If the 563 * This function will take a reference on the new fb for the plane
564 * update fails, this reference will be dropped before return; if it succeeds, 564 * on success.
565 * the previous framebuffer (if any) will be unreferenced instead.
566 * 565 *
567 * src_{x,y,w,h} are provided in 16.16 fixed point format 566 * src_{x,y,w,h} are provided in 16.16 fixed point format
568 */ 567 */
@@ -630,14 +629,12 @@ static int __setplane_internal(struct drm_plane *plane,
630 if (!ret) { 629 if (!ret) {
631 plane->crtc = crtc; 630 plane->crtc = crtc;
632 plane->fb = fb; 631 plane->fb = fb;
633 fb = NULL; 632 drm_framebuffer_get(plane->fb);
634 } else { 633 } else {
635 plane->old_fb = NULL; 634 plane->old_fb = NULL;
636 } 635 }
637 636
638out: 637out:
639 if (fb)
640 drm_framebuffer_put(fb);
641 if (plane->old_fb) 638 if (plane->old_fb)
642 drm_framebuffer_put(plane->old_fb); 639 drm_framebuffer_put(plane->old_fb);
643 plane->old_fb = NULL; 640 plane->old_fb = NULL;
@@ -685,6 +682,7 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
685 struct drm_plane *plane; 682 struct drm_plane *plane;
686 struct drm_crtc *crtc = NULL; 683 struct drm_crtc *crtc = NULL;
687 struct drm_framebuffer *fb = NULL; 684 struct drm_framebuffer *fb = NULL;
685 int ret;
688 686
689 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 687 if (!drm_core_check_feature(dev, DRIVER_MODESET))
690 return -EINVAL; 688 return -EINVAL;
@@ -717,15 +715,16 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
717 } 715 }
718 } 716 }
719 717
720 /* 718 ret = setplane_internal(plane, crtc, fb,
721 * setplane_internal will take care of deref'ing either the old or new 719 plane_req->crtc_x, plane_req->crtc_y,
722 * framebuffer depending on success. 720 plane_req->crtc_w, plane_req->crtc_h,
723 */ 721 plane_req->src_x, plane_req->src_y,
724 return setplane_internal(plane, crtc, fb, 722 plane_req->src_w, plane_req->src_h);
725 plane_req->crtc_x, plane_req->crtc_y, 723
726 plane_req->crtc_w, plane_req->crtc_h, 724 if (fb)
727 plane_req->src_x, plane_req->src_y, 725 drm_framebuffer_put(fb);
728 plane_req->src_w, plane_req->src_h); 726
727 return ret;
729} 728}
730 729
731static int drm_mode_cursor_universal(struct drm_crtc *crtc, 730static int drm_mode_cursor_universal(struct drm_crtc *crtc,
@@ -788,13 +787,12 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc,
788 src_h = fb->height << 16; 787 src_h = fb->height << 16;
789 } 788 }
790 789
791 /*
792 * setplane_internal will take care of deref'ing either the old or new
793 * framebuffer depending on success.
794 */
795 ret = __setplane_internal(crtc->cursor, crtc, fb, 790 ret = __setplane_internal(crtc->cursor, crtc, fb,
796 crtc_x, crtc_y, crtc_w, crtc_h, 791 crtc_x, crtc_y, crtc_w, crtc_h,
797 0, 0, src_w, src_h, ctx); 792 0, 0, src_w, src_h, ctx);
793
794 if (fb)
795 drm_framebuffer_put(fb);
798 796
799 /* Update successful; save new cursor position, if necessary */ 797 /* Update successful; save new cursor position, if necessary */
800 if (ret == 0 && req->flags & DRM_MODE_CURSOR_MOVE) { 798 if (ret == 0 && req->flags & DRM_MODE_CURSOR_MOVE) {
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index f776fc1cc543..cb4d09c70fd4 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -369,40 +369,26 @@ static const struct file_operations drm_syncobj_file_fops = {
369 .release = drm_syncobj_file_release, 369 .release = drm_syncobj_file_release,
370}; 370};
371 371
372static int drm_syncobj_alloc_file(struct drm_syncobj *syncobj)
373{
374 struct file *file = anon_inode_getfile("syncobj_file",
375 &drm_syncobj_file_fops,
376 syncobj, 0);
377 if (IS_ERR(file))
378 return PTR_ERR(file);
379
380 drm_syncobj_get(syncobj);
381 if (cmpxchg(&syncobj->file, NULL, file)) {
382 /* lost the race */
383 fput(file);
384 }
385
386 return 0;
387}
388
389int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd) 372int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd)
390{ 373{
391 int ret; 374 struct file *file;
392 int fd; 375 int fd;
393 376
394 fd = get_unused_fd_flags(O_CLOEXEC); 377 fd = get_unused_fd_flags(O_CLOEXEC);
395 if (fd < 0) 378 if (fd < 0)
396 return fd; 379 return fd;
397 380
398 if (!syncobj->file) { 381 file = anon_inode_getfile("syncobj_file",
399 ret = drm_syncobj_alloc_file(syncobj); 382 &drm_syncobj_file_fops,
400 if (ret) { 383 syncobj, 0);
401 put_unused_fd(fd); 384 if (IS_ERR(file)) {
402 return ret; 385 put_unused_fd(fd);
403 } 386 return PTR_ERR(file);
404 } 387 }
405 fd_install(fd, syncobj->file); 388
389 drm_syncobj_get(syncobj);
390 fd_install(fd, file);
391
406 *p_fd = fd; 392 *p_fd = fd;
407 return 0; 393 return 0;
408} 394}
@@ -422,31 +408,24 @@ static int drm_syncobj_handle_to_fd(struct drm_file *file_private,
422 return ret; 408 return ret;
423} 409}
424 410
425static struct drm_syncobj *drm_syncobj_fdget(int fd)
426{
427 struct file *file = fget(fd);
428
429 if (!file)
430 return NULL;
431 if (file->f_op != &drm_syncobj_file_fops)
432 goto err;
433
434 return file->private_data;
435err:
436 fput(file);
437 return NULL;
438};
439
440static int drm_syncobj_fd_to_handle(struct drm_file *file_private, 411static int drm_syncobj_fd_to_handle(struct drm_file *file_private,
441 int fd, u32 *handle) 412 int fd, u32 *handle)
442{ 413{
443 struct drm_syncobj *syncobj = drm_syncobj_fdget(fd); 414 struct drm_syncobj *syncobj;
415 struct file *file;
444 int ret; 416 int ret;
445 417
446 if (!syncobj) 418 file = fget(fd);
419 if (!file)
447 return -EINVAL; 420 return -EINVAL;
448 421
422 if (file->f_op != &drm_syncobj_file_fops) {
423 fput(file);
424 return -EINVAL;
425 }
426
449 /* take a reference to put in the idr */ 427 /* take a reference to put in the idr */
428 syncobj = file->private_data;
450 drm_syncobj_get(syncobj); 429 drm_syncobj_get(syncobj);
451 430
452 idr_preload(GFP_KERNEL); 431 idr_preload(GFP_KERNEL);
@@ -455,12 +434,14 @@ static int drm_syncobj_fd_to_handle(struct drm_file *file_private,
455 spin_unlock(&file_private->syncobj_table_lock); 434 spin_unlock(&file_private->syncobj_table_lock);
456 idr_preload_end(); 435 idr_preload_end();
457 436
458 if (ret < 0) { 437 if (ret > 0) {
459 fput(syncobj->file); 438 *handle = ret;
460 return ret; 439 ret = 0;
461 } 440 } else
462 *handle = ret; 441 drm_syncobj_put(syncobj);
463 return 0; 442
443 fput(file);
444 return ret;
464} 445}
465 446
466static int drm_syncobj_import_sync_file_fence(struct drm_file *file_private, 447static int drm_syncobj_import_sync_file_fence(struct drm_file *file_private,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ad4050f7ab3b..18de6569d04a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -330,17 +330,10 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
330 * must wait for all rendering to complete to the object (as unbinding 330 * must wait for all rendering to complete to the object (as unbinding
331 * must anyway), and retire the requests. 331 * must anyway), and retire the requests.
332 */ 332 */
333 ret = i915_gem_object_wait(obj, 333 ret = i915_gem_object_set_to_cpu_domain(obj, false);
334 I915_WAIT_INTERRUPTIBLE |
335 I915_WAIT_LOCKED |
336 I915_WAIT_ALL,
337 MAX_SCHEDULE_TIMEOUT,
338 NULL);
339 if (ret) 334 if (ret)
340 return ret; 335 return ret;
341 336
342 i915_gem_retire_requests(to_i915(obj->base.dev));
343
344 while ((vma = list_first_entry_or_null(&obj->vma_list, 337 while ((vma = list_first_entry_or_null(&obj->vma_list,
345 struct i915_vma, 338 struct i915_vma,
346 obj_link))) { 339 obj_link))) {
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index e8ca67a129d2..ac236b88c99c 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -367,6 +367,7 @@ struct i915_sw_dma_fence_cb {
367 struct dma_fence *dma; 367 struct dma_fence *dma;
368 struct timer_list timer; 368 struct timer_list timer;
369 struct irq_work work; 369 struct irq_work work;
370 struct rcu_head rcu;
370}; 371};
371 372
372static void timer_i915_sw_fence_wake(struct timer_list *t) 373static void timer_i915_sw_fence_wake(struct timer_list *t)
@@ -406,7 +407,7 @@ static void irq_i915_sw_fence_work(struct irq_work *wrk)
406 del_timer_sync(&cb->timer); 407 del_timer_sync(&cb->timer);
407 dma_fence_put(cb->dma); 408 dma_fence_put(cb->dma);
408 409
409 kfree(cb); 410 kfree_rcu(cb, rcu);
410} 411}
411 412
412int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, 413int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 5f8b9f1f40f1..bcbc7abe6693 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -186,7 +186,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
186 struct intel_wait *wait, *n, *first; 186 struct intel_wait *wait, *n, *first;
187 187
188 if (!b->irq_armed) 188 if (!b->irq_armed)
189 return; 189 goto wakeup_signaler;
190 190
191 /* We only disarm the irq when we are idle (all requests completed), 191 /* We only disarm the irq when we are idle (all requests completed),
192 * so if the bottom-half remains asleep, it missed the request 192 * so if the bottom-half remains asleep, it missed the request
@@ -208,6 +208,14 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
208 b->waiters = RB_ROOT; 208 b->waiters = RB_ROOT;
209 209
210 spin_unlock_irq(&b->rb_lock); 210 spin_unlock_irq(&b->rb_lock);
211
212 /*
213 * The signaling thread may be asleep holding a reference to a request,
214 * that had its signaling cancelled prior to being preempted. We need
215 * to kick the signaler, just in case, to release any such reference.
216 */
217wakeup_signaler:
218 wake_up_process(b->signaler);
211} 219}
212 220
213static bool use_fake_irq(const struct intel_breadcrumbs *b) 221static bool use_fake_irq(const struct intel_breadcrumbs *b)
@@ -651,23 +659,15 @@ static int intel_breadcrumbs_signaler(void *arg)
651 } 659 }
652 660
653 if (unlikely(do_schedule)) { 661 if (unlikely(do_schedule)) {
654 DEFINE_WAIT(exec);
655
656 if (kthread_should_park()) 662 if (kthread_should_park())
657 kthread_parkme(); 663 kthread_parkme();
658 664
659 if (kthread_should_stop()) { 665 if (unlikely(kthread_should_stop())) {
660 GEM_BUG_ON(request); 666 i915_gem_request_put(request);
661 break; 667 break;
662 } 668 }
663 669
664 if (request)
665 add_wait_queue(&request->execute, &exec);
666
667 schedule(); 670 schedule();
668
669 if (request)
670 remove_wait_queue(&request->execute, &exec);
671 } 671 }
672 i915_gem_request_put(request); 672 i915_gem_request_put(request);
673 } while (1); 673 } while (1);
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index e0843bb99169..58a3755544b2 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -2128,6 +2128,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder,
2128 if (WARN_ON(!pll)) 2128 if (WARN_ON(!pll))
2129 return; 2129 return;
2130 2130
2131 mutex_lock(&dev_priv->dpll_lock);
2132
2131 if (IS_CANNONLAKE(dev_priv)) { 2133 if (IS_CANNONLAKE(dev_priv)) {
2132 /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */ 2134 /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */
2133 val = I915_READ(DPCLKA_CFGCR0); 2135 val = I915_READ(DPCLKA_CFGCR0);
@@ -2157,6 +2159,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder,
2157 } else if (INTEL_INFO(dev_priv)->gen < 9) { 2159 } else if (INTEL_INFO(dev_priv)->gen < 9) {
2158 I915_WRITE(PORT_CLK_SEL(port), hsw_pll_to_ddi_pll_sel(pll)); 2160 I915_WRITE(PORT_CLK_SEL(port), hsw_pll_to_ddi_pll_sel(pll));
2159 } 2161 }
2162
2163 mutex_unlock(&dev_priv->dpll_lock);
2160} 2164}
2161 2165
2162static void intel_ddi_clk_disable(struct intel_encoder *encoder) 2166static void intel_ddi_clk_disable(struct intel_encoder *encoder)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index e8ccf89cb17b..ff9397030092 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9944,11 +9944,10 @@ found:
9944 } 9944 }
9945 9945
9946 ret = intel_modeset_setup_plane_state(state, crtc, mode, fb, 0, 0); 9946 ret = intel_modeset_setup_plane_state(state, crtc, mode, fb, 0, 0);
9947 drm_framebuffer_put(fb);
9947 if (ret) 9948 if (ret)
9948 goto fail; 9949 goto fail;
9949 9950
9950 drm_framebuffer_put(fb);
9951
9952 ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode); 9951 ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode);
9953 if (ret) 9952 if (ret)
9954 goto fail; 9953 goto fail;
diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c
index 3bf65288ffff..5809b29044fc 100644
--- a/drivers/gpu/drm/i915/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/intel_lpe_audio.c
@@ -193,7 +193,7 @@ static bool lpe_audio_detect(struct drm_i915_private *dev_priv)
193 }; 193 };
194 194
195 if (!pci_dev_present(atom_hdaudio_ids)) { 195 if (!pci_dev_present(atom_hdaudio_ids)) {
196 DRM_INFO("%s\n", "HDaudio controller not detected, using LPE audio instead\n"); 196 DRM_INFO("HDaudio controller not detected, using LPE audio instead\n");
197 lpe_present = true; 197 lpe_present = true;
198 } 198 }
199 } 199 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 2615912430cc..435ff8662cfa 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -224,7 +224,7 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
224 /* Determine if we can get a cache-coherent map, forcing 224 /* Determine if we can get a cache-coherent map, forcing
225 * uncached mapping if we can't. 225 * uncached mapping if we can't.
226 */ 226 */
227 if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED) 227 if (!nouveau_drm_use_coherent_gpu_mapping(drm))
228 nvbo->force_coherent = true; 228 nvbo->force_coherent = true;
229 } 229 }
230 230
@@ -262,7 +262,8 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
262 if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE && 262 if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE &&
263 (flags & TTM_PL_FLAG_VRAM) && !vmm->page[i].vram) 263 (flags & TTM_PL_FLAG_VRAM) && !vmm->page[i].vram)
264 continue; 264 continue;
265 if ((flags & TTM_PL_FLAG_TT ) && !vmm->page[i].host) 265 if ((flags & TTM_PL_FLAG_TT) &&
266 (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
266 continue; 267 continue;
267 268
268 /* Select this page size if it's the first that supports 269 /* Select this page size if it's the first that supports
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 3331e82ae9e7..96f6bd8aee5d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -157,8 +157,8 @@ struct nouveau_drm {
157 struct nvif_object copy; 157 struct nvif_object copy;
158 int mtrr; 158 int mtrr;
159 int type_vram; 159 int type_vram;
160 int type_host; 160 int type_host[2];
161 int type_ncoh; 161 int type_ncoh[2];
162 } ttm; 162 } ttm;
163 163
164 /* GEM interface support */ 164 /* GEM interface support */
@@ -217,6 +217,13 @@ nouveau_drm(struct drm_device *dev)
217 return dev->dev_private; 217 return dev->dev_private;
218} 218}
219 219
220static inline bool
221nouveau_drm_use_coherent_gpu_mapping(struct nouveau_drm *drm)
222{
223 struct nvif_mmu *mmu = &drm->client.mmu;
224 return !(mmu->type[drm->ttm.type_host[0]].type & NVIF_MEM_UNCACHED);
225}
226
220int nouveau_pmops_suspend(struct device *); 227int nouveau_pmops_suspend(struct device *);
221int nouveau_pmops_resume(struct device *); 228int nouveau_pmops_resume(struct device *);
222bool nouveau_pmops_runtime(void); 229bool nouveau_pmops_runtime(void);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index c533d8e04afc..be7357bf2246 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -429,7 +429,7 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon)
429 drm_fb_helper_unregister_fbi(&fbcon->helper); 429 drm_fb_helper_unregister_fbi(&fbcon->helper);
430 drm_fb_helper_fini(&fbcon->helper); 430 drm_fb_helper_fini(&fbcon->helper);
431 431
432 if (nouveau_fb->nvbo) { 432 if (nouveau_fb && nouveau_fb->nvbo) {
433 nouveau_vma_del(&nouveau_fb->vma); 433 nouveau_vma_del(&nouveau_fb->vma);
434 nouveau_bo_unmap(nouveau_fb->nvbo); 434 nouveau_bo_unmap(nouveau_fb->nvbo);
435 nouveau_bo_unpin(nouveau_fb->nvbo); 435 nouveau_bo_unpin(nouveau_fb->nvbo);
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 589a9621db76..c002f8968507 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -103,10 +103,10 @@ nouveau_mem_host(struct ttm_mem_reg *reg, struct ttm_dma_tt *tt)
103 u8 type; 103 u8 type;
104 int ret; 104 int ret;
105 105
106 if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED) 106 if (!nouveau_drm_use_coherent_gpu_mapping(drm))
107 type = drm->ttm.type_ncoh; 107 type = drm->ttm.type_ncoh[!!mem->kind];
108 else 108 else
109 type = drm->ttm.type_host; 109 type = drm->ttm.type_host[0];
110 110
111 if (mem->kind && !(mmu->type[type].type & NVIF_MEM_KIND)) 111 if (mem->kind && !(mmu->type[type].type & NVIF_MEM_KIND))
112 mem->comp = mem->kind = 0; 112 mem->comp = mem->kind = 0;
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index 08b974b30482..dff51a0ee028 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -235,27 +235,46 @@ nouveau_ttm_global_release(struct nouveau_drm *drm)
235 drm->ttm.mem_global_ref.release = NULL; 235 drm->ttm.mem_global_ref.release = NULL;
236} 236}
237 237
238int 238static int
239nouveau_ttm_init(struct nouveau_drm *drm) 239nouveau_ttm_init_host(struct nouveau_drm *drm, u8 kind)
240{ 240{
241 struct nvkm_device *device = nvxx_device(&drm->client.device);
242 struct nvkm_pci *pci = device->pci;
243 struct nvif_mmu *mmu = &drm->client.mmu; 241 struct nvif_mmu *mmu = &drm->client.mmu;
244 struct drm_device *dev = drm->dev; 242 int typei;
245 int typei, ret;
246 243
247 typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE | 244 typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE |
248 NVIF_MEM_COHERENT); 245 kind | NVIF_MEM_COHERENT);
249 if (typei < 0) 246 if (typei < 0)
250 return -ENOSYS; 247 return -ENOSYS;
251 248
252 drm->ttm.type_host = typei; 249 drm->ttm.type_host[!!kind] = typei;
253 250
254 typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE); 251 typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE | kind);
255 if (typei < 0) 252 if (typei < 0)
256 return -ENOSYS; 253 return -ENOSYS;
257 254
258 drm->ttm.type_ncoh = typei; 255 drm->ttm.type_ncoh[!!kind] = typei;
256 return 0;
257}
258
259int
260nouveau_ttm_init(struct nouveau_drm *drm)
261{
262 struct nvkm_device *device = nvxx_device(&drm->client.device);
263 struct nvkm_pci *pci = device->pci;
264 struct nvif_mmu *mmu = &drm->client.mmu;
265 struct drm_device *dev = drm->dev;
266 int typei, ret;
267
268 ret = nouveau_ttm_init_host(drm, 0);
269 if (ret)
270 return ret;
271
272 if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA &&
273 drm->client.device.info.chipset != 0x50) {
274 ret = nouveau_ttm_init_host(drm, NVIF_MEM_KIND);
275 if (ret)
276 return ret;
277 }
259 278
260 if (drm->client.device.info.platform != NV_DEVICE_INFO_V0_SOC && 279 if (drm->client.device.info.platform != NV_DEVICE_INFO_V0_SOC &&
261 drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) { 280 drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c
index 9e2628dd8e4d..f5371d96b003 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c
@@ -67,8 +67,8 @@ nouveau_vma_del(struct nouveau_vma **pvma)
67 nvif_vmm_put(&vma->vmm->vmm, &tmp); 67 nvif_vmm_put(&vma->vmm->vmm, &tmp);
68 } 68 }
69 list_del(&vma->head); 69 list_del(&vma->head);
70 *pvma = NULL;
71 kfree(*pvma); 70 kfree(*pvma);
71 *pvma = NULL;
72 } 72 }
73} 73}
74 74
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index e14643615698..00eeaaffeae5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -2369,7 +2369,7 @@ nv13b_chipset = {
2369 .imem = gk20a_instmem_new, 2369 .imem = gk20a_instmem_new,
2370 .ltc = gp100_ltc_new, 2370 .ltc = gp100_ltc_new,
2371 .mc = gp10b_mc_new, 2371 .mc = gp10b_mc_new,
2372 .mmu = gf100_mmu_new, 2372 .mmu = gp10b_mmu_new,
2373 .secboot = gp10b_secboot_new, 2373 .secboot = gp10b_secboot_new,
2374 .pmu = gm20b_pmu_new, 2374 .pmu = gm20b_pmu_new,
2375 .timer = gk20a_timer_new, 2375 .timer = gk20a_timer_new,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
index 972370ed36f0..7c7efa4ea0d0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
@@ -36,6 +36,7 @@ nvbios_dp_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
36 if (data) { 36 if (data) {
37 *ver = nvbios_rd08(bios, data + 0x00); 37 *ver = nvbios_rd08(bios, data + 0x00);
38 switch (*ver) { 38 switch (*ver) {
39 case 0x20:
39 case 0x21: 40 case 0x21:
40 case 0x30: 41 case 0x30:
41 case 0x40: 42 case 0x40:
@@ -63,6 +64,7 @@ nvbios_dpout_entry(struct nvkm_bios *bios, u8 idx,
63 if (data && idx < *cnt) { 64 if (data && idx < *cnt) {
64 u16 outp = nvbios_rd16(bios, data + *hdr + idx * *len); 65 u16 outp = nvbios_rd16(bios, data + *hdr + idx * *len);
65 switch (*ver * !!outp) { 66 switch (*ver * !!outp) {
67 case 0x20:
66 case 0x21: 68 case 0x21:
67 case 0x30: 69 case 0x30:
68 *hdr = nvbios_rd08(bios, data + 0x04); 70 *hdr = nvbios_rd08(bios, data + 0x04);
@@ -96,12 +98,16 @@ nvbios_dpout_parse(struct nvkm_bios *bios, u8 idx,
96 info->type = nvbios_rd16(bios, data + 0x00); 98 info->type = nvbios_rd16(bios, data + 0x00);
97 info->mask = nvbios_rd16(bios, data + 0x02); 99 info->mask = nvbios_rd16(bios, data + 0x02);
98 switch (*ver) { 100 switch (*ver) {
101 case 0x20:
102 info->mask |= 0x00c0; /* match any link */
103 /* fall-through */
99 case 0x21: 104 case 0x21:
100 case 0x30: 105 case 0x30:
101 info->flags = nvbios_rd08(bios, data + 0x05); 106 info->flags = nvbios_rd08(bios, data + 0x05);
102 info->script[0] = nvbios_rd16(bios, data + 0x06); 107 info->script[0] = nvbios_rd16(bios, data + 0x06);
103 info->script[1] = nvbios_rd16(bios, data + 0x08); 108 info->script[1] = nvbios_rd16(bios, data + 0x08);
104 info->lnkcmp = nvbios_rd16(bios, data + 0x0a); 109 if (*len >= 0x0c)
110 info->lnkcmp = nvbios_rd16(bios, data + 0x0a);
105 if (*len >= 0x0f) { 111 if (*len >= 0x0f) {
106 info->script[2] = nvbios_rd16(bios, data + 0x0c); 112 info->script[2] = nvbios_rd16(bios, data + 0x0c);
107 info->script[3] = nvbios_rd16(bios, data + 0x0e); 113 info->script[3] = nvbios_rd16(bios, data + 0x0e);
@@ -170,6 +176,7 @@ nvbios_dpcfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx,
170 memset(info, 0x00, sizeof(*info)); 176 memset(info, 0x00, sizeof(*info));
171 if (data) { 177 if (data) {
172 switch (*ver) { 178 switch (*ver) {
179 case 0x20:
173 case 0x21: 180 case 0x21:
174 info->dc = nvbios_rd08(bios, data + 0x02); 181 info->dc = nvbios_rd08(bios, data + 0x02);
175 info->pe = nvbios_rd08(bios, data + 0x03); 182 info->pe = nvbios_rd08(bios, data + 0x03);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
index 1ba7289684aa..db48a1daca0c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
@@ -249,7 +249,7 @@ nv50_instobj_acquire(struct nvkm_memory *memory)
249 iobj->base.memory.ptrs = &nv50_instobj_fast; 249 iobj->base.memory.ptrs = &nv50_instobj_fast;
250 else 250 else
251 iobj->base.memory.ptrs = &nv50_instobj_slow; 251 iobj->base.memory.ptrs = &nv50_instobj_slow;
252 refcount_inc(&iobj->maps); 252 refcount_set(&iobj->maps, 1);
253 } 253 }
254 254
255 mutex_unlock(&imem->subdev.mutex); 255 mutex_unlock(&imem->subdev.mutex);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
index b1b1f3626b96..deb96de54b00 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
@@ -136,6 +136,13 @@ nvkm_pci_init(struct nvkm_subdev *subdev)
136 return ret; 136 return ret;
137 137
138 pci->irq = pdev->irq; 138 pci->irq = pdev->irq;
139
140 /* Ensure MSI interrupts are armed, for the case where there are
141 * already interrupts pending (for whatever reason) at load time.
142 */
143 if (pci->msi)
144 pci->func->msi_rearm(pci);
145
139 return ret; 146 return ret;
140} 147}
141 148
diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
index dda904ec0534..500b6fb3e028 100644
--- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
+++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
@@ -175,11 +175,31 @@ static void sun4i_hdmi_mode_set(struct drm_encoder *encoder,
175 writel(val, hdmi->base + SUN4I_HDMI_VID_TIMING_POL_REG); 175 writel(val, hdmi->base + SUN4I_HDMI_VID_TIMING_POL_REG);
176} 176}
177 177
178static enum drm_mode_status sun4i_hdmi_mode_valid(struct drm_encoder *encoder,
179 const struct drm_display_mode *mode)
180{
181 struct sun4i_hdmi *hdmi = drm_encoder_to_sun4i_hdmi(encoder);
182 unsigned long rate = mode->clock * 1000;
183 unsigned long diff = rate / 200; /* +-0.5% allowed by HDMI spec */
184 long rounded_rate;
185
186 /* 165 MHz is the typical max pixelclock frequency for HDMI <= 1.2 */
187 if (rate > 165000000)
188 return MODE_CLOCK_HIGH;
189 rounded_rate = clk_round_rate(hdmi->tmds_clk, rate);
190 if (rounded_rate > 0 &&
191 max_t(unsigned long, rounded_rate, rate) -
192 min_t(unsigned long, rounded_rate, rate) < diff)
193 return MODE_OK;
194 return MODE_NOCLOCK;
195}
196
178static const struct drm_encoder_helper_funcs sun4i_hdmi_helper_funcs = { 197static const struct drm_encoder_helper_funcs sun4i_hdmi_helper_funcs = {
179 .atomic_check = sun4i_hdmi_atomic_check, 198 .atomic_check = sun4i_hdmi_atomic_check,
180 .disable = sun4i_hdmi_disable, 199 .disable = sun4i_hdmi_disable,
181 .enable = sun4i_hdmi_enable, 200 .enable = sun4i_hdmi_enable,
182 .mode_set = sun4i_hdmi_mode_set, 201 .mode_set = sun4i_hdmi_mode_set,
202 .mode_valid = sun4i_hdmi_mode_valid,
183}; 203};
184 204
185static const struct drm_encoder_funcs sun4i_hdmi_funcs = { 205static const struct drm_encoder_funcs sun4i_hdmi_funcs = {
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index e122f5b2a395..f4284b51bdca 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -724,12 +724,12 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
724 if (IS_ERR(tcon->crtc)) { 724 if (IS_ERR(tcon->crtc)) {
725 dev_err(dev, "Couldn't create our CRTC\n"); 725 dev_err(dev, "Couldn't create our CRTC\n");
726 ret = PTR_ERR(tcon->crtc); 726 ret = PTR_ERR(tcon->crtc);
727 goto err_free_clocks; 727 goto err_free_dotclock;
728 } 728 }
729 729
730 ret = sun4i_rgb_init(drm, tcon); 730 ret = sun4i_rgb_init(drm, tcon);
731 if (ret < 0) 731 if (ret < 0)
732 goto err_free_clocks; 732 goto err_free_dotclock;
733 733
734 if (tcon->quirks->needs_de_be_mux) { 734 if (tcon->quirks->needs_de_be_mux) {
735 /* 735 /*
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index 44343a2bf55c..b5ba6441489f 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -455,6 +455,7 @@ ttm_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
455 freed += (nr_free_pool - shrink_pages) << pool->order; 455 freed += (nr_free_pool - shrink_pages) << pool->order;
456 if (freed >= sc->nr_to_scan) 456 if (freed >= sc->nr_to_scan)
457 break; 457 break;
458 shrink_pages <<= pool->order;
458 } 459 }
459 mutex_unlock(&lock); 460 mutex_unlock(&lock);
460 return freed; 461 return freed;
@@ -543,7 +544,7 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags,
543 int r = 0; 544 int r = 0;
544 unsigned i, j, cpages; 545 unsigned i, j, cpages;
545 unsigned npages = 1 << order; 546 unsigned npages = 1 << order;
546 unsigned max_cpages = min(count, (unsigned)NUM_PAGES_TO_ALLOC); 547 unsigned max_cpages = min(count << order, (unsigned)NUM_PAGES_TO_ALLOC);
547 548
548 /* allocate array for page caching change */ 549 /* allocate array for page caching change */
549 caching_array = kmalloc(max_cpages*sizeof(struct page *), GFP_KERNEL); 550 caching_array = kmalloc(max_cpages*sizeof(struct page *), GFP_KERNEL);
diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index c9790e2c3440..af5123042990 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -143,6 +143,7 @@ static int hwmon_thermal_add_sensor(struct device *dev,
143 struct hwmon_device *hwdev, int index) 143 struct hwmon_device *hwdev, int index)
144{ 144{
145 struct hwmon_thermal_data *tdata; 145 struct hwmon_thermal_data *tdata;
146 struct thermal_zone_device *tzd;
146 147
147 tdata = devm_kzalloc(dev, sizeof(*tdata), GFP_KERNEL); 148 tdata = devm_kzalloc(dev, sizeof(*tdata), GFP_KERNEL);
148 if (!tdata) 149 if (!tdata)
@@ -151,8 +152,14 @@ static int hwmon_thermal_add_sensor(struct device *dev,
151 tdata->hwdev = hwdev; 152 tdata->hwdev = hwdev;
152 tdata->index = index; 153 tdata->index = index;
153 154
154 devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata, 155 tzd = devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata,
155 &hwmon_thermal_ops); 156 &hwmon_thermal_ops);
157 /*
158 * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV,
159 * so ignore that error but forward any other error.
160 */
161 if (IS_ERR(tzd) && (PTR_ERR(tzd) != -ENODEV))
162 return PTR_ERR(tzd);
156 163
157 return 0; 164 return 0;
158} 165}
@@ -621,14 +628,20 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
621 if (!chip->ops->is_visible(drvdata, hwmon_temp, 628 if (!chip->ops->is_visible(drvdata, hwmon_temp,
622 hwmon_temp_input, j)) 629 hwmon_temp_input, j))
623 continue; 630 continue;
624 if (info[i]->config[j] & HWMON_T_INPUT) 631 if (info[i]->config[j] & HWMON_T_INPUT) {
625 hwmon_thermal_add_sensor(dev, hwdev, j); 632 err = hwmon_thermal_add_sensor(dev,
633 hwdev, j);
634 if (err)
635 goto free_device;
636 }
626 } 637 }
627 } 638 }
628 } 639 }
629 640
630 return hdev; 641 return hdev;
631 642
643free_device:
644 device_unregister(hdev);
632free_hwmon: 645free_hwmon:
633 kfree(hwdev); 646 kfree(hwdev);
634ida_remove: 647ida_remove:
diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c
index c9714072e224..59c82cdcf48d 100644
--- a/drivers/mfd/cros_ec_spi.c
+++ b/drivers/mfd/cros_ec_spi.c
@@ -377,6 +377,7 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
377 u8 *ptr; 377 u8 *ptr;
378 u8 *rx_buf; 378 u8 *rx_buf;
379 u8 sum; 379 u8 sum;
380 u8 rx_byte;
380 int ret = 0, final_ret; 381 int ret = 0, final_ret;
381 382
382 len = cros_ec_prepare_tx(ec_dev, ec_msg); 383 len = cros_ec_prepare_tx(ec_dev, ec_msg);
@@ -421,25 +422,22 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
421 if (!ret) { 422 if (!ret) {
422 /* Verify that EC can process command */ 423 /* Verify that EC can process command */
423 for (i = 0; i < len; i++) { 424 for (i = 0; i < len; i++) {
424 switch (rx_buf[i]) { 425 rx_byte = rx_buf[i];
425 case EC_SPI_PAST_END: 426 if (rx_byte == EC_SPI_PAST_END ||
426 case EC_SPI_RX_BAD_DATA: 427 rx_byte == EC_SPI_RX_BAD_DATA ||
427 case EC_SPI_NOT_READY: 428 rx_byte == EC_SPI_NOT_READY) {
428 ret = -EAGAIN; 429 ret = -EREMOTEIO;
429 ec_msg->result = EC_RES_IN_PROGRESS;
430 default:
431 break; 430 break;
432 } 431 }
433 if (ret)
434 break;
435 } 432 }
436 if (!ret)
437 ret = cros_ec_spi_receive_packet(ec_dev,
438 ec_msg->insize + sizeof(*response));
439 } else {
440 dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
441 } 433 }
442 434
435 if (!ret)
436 ret = cros_ec_spi_receive_packet(ec_dev,
437 ec_msg->insize + sizeof(*response));
438 else
439 dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
440
443 final_ret = terminate_request(ec_dev); 441 final_ret = terminate_request(ec_dev);
444 442
445 spi_bus_unlock(ec_spi->spi->master); 443 spi_bus_unlock(ec_spi->spi->master);
@@ -508,6 +506,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
508 int i, len; 506 int i, len;
509 u8 *ptr; 507 u8 *ptr;
510 u8 *rx_buf; 508 u8 *rx_buf;
509 u8 rx_byte;
511 int sum; 510 int sum;
512 int ret = 0, final_ret; 511 int ret = 0, final_ret;
513 512
@@ -544,25 +543,22 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
544 if (!ret) { 543 if (!ret) {
545 /* Verify that EC can process command */ 544 /* Verify that EC can process command */
546 for (i = 0; i < len; i++) { 545 for (i = 0; i < len; i++) {
547 switch (rx_buf[i]) { 546 rx_byte = rx_buf[i];
548 case EC_SPI_PAST_END: 547 if (rx_byte == EC_SPI_PAST_END ||
549 case EC_SPI_RX_BAD_DATA: 548 rx_byte == EC_SPI_RX_BAD_DATA ||
550 case EC_SPI_NOT_READY: 549 rx_byte == EC_SPI_NOT_READY) {
551 ret = -EAGAIN; 550 ret = -EREMOTEIO;
552 ec_msg->result = EC_RES_IN_PROGRESS;
553 default:
554 break; 551 break;
555 } 552 }
556 if (ret)
557 break;
558 } 553 }
559 if (!ret)
560 ret = cros_ec_spi_receive_response(ec_dev,
561 ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
562 } else {
563 dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
564 } 554 }
565 555
556 if (!ret)
557 ret = cros_ec_spi_receive_response(ec_dev,
558 ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
559 else
560 dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
561
566 final_ret = terminate_request(ec_dev); 562 final_ret = terminate_request(ec_dev);
567 563
568 spi_bus_unlock(ec_spi->spi->master); 564 spi_bus_unlock(ec_spi->spi->master);
@@ -667,6 +663,7 @@ static int cros_ec_spi_probe(struct spi_device *spi)
667 sizeof(struct ec_response_get_protocol_info); 663 sizeof(struct ec_response_get_protocol_info);
668 ec_dev->dout_size = sizeof(struct ec_host_request); 664 ec_dev->dout_size = sizeof(struct ec_host_request);
669 665
666 ec_spi->last_transfer_ns = ktime_get_ns();
670 667
671 err = cros_ec_register(ec_dev); 668 err = cros_ec_register(ec_dev);
672 if (err) { 669 if (err) {
diff --git a/drivers/mfd/twl4030-audio.c b/drivers/mfd/twl4030-audio.c
index da16bf45fab4..dc94ffc6321a 100644
--- a/drivers/mfd/twl4030-audio.c
+++ b/drivers/mfd/twl4030-audio.c
@@ -159,13 +159,18 @@ unsigned int twl4030_audio_get_mclk(void)
159EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk); 159EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk);
160 160
161static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata, 161static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata,
162 struct device_node *node) 162 struct device_node *parent)
163{ 163{
164 struct device_node *node;
165
164 if (pdata && pdata->codec) 166 if (pdata && pdata->codec)
165 return true; 167 return true;
166 168
167 if (of_find_node_by_name(node, "codec")) 169 node = of_get_child_by_name(parent, "codec");
170 if (node) {
171 of_node_put(node);
168 return true; 172 return true;
173 }
169 174
170 return false; 175 return false;
171} 176}
diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c
index d66502d36ba0..dd19f17a1b63 100644
--- a/drivers/mfd/twl6040.c
+++ b/drivers/mfd/twl6040.c
@@ -97,12 +97,16 @@ static struct reg_sequence twl6040_patch[] = {
97}; 97};
98 98
99 99
100static bool twl6040_has_vibra(struct device_node *node) 100static bool twl6040_has_vibra(struct device_node *parent)
101{ 101{
102#ifdef CONFIG_OF 102 struct device_node *node;
103 if (of_find_node_by_name(node, "vibra")) 103
104 node = of_get_child_by_name(parent, "vibra");
105 if (node) {
106 of_node_put(node);
104 return true; 107 return true;
105#endif 108 }
109
106 return false; 110 return false;
107} 111}
108 112
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index f80e911b8843..73b605577447 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1114,7 +1114,7 @@ static int mtd_check_oob_ops(struct mtd_info *mtd, loff_t offs,
1114 if (!ops->oobbuf) 1114 if (!ops->oobbuf)
1115 ops->ooblen = 0; 1115 ops->ooblen = 0;
1116 1116
1117 if (offs < 0 || offs + ops->len >= mtd->size) 1117 if (offs < 0 || offs + ops->len > mtd->size)
1118 return -EINVAL; 1118 return -EINVAL;
1119 1119
1120 if (ops->ooblen) { 1120 if (ops->ooblen) {
diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c
index e0eb51d8c012..dd56a671ea42 100644
--- a/drivers/mtd/nand/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/brcmnand/brcmnand.c
@@ -1763,7 +1763,7 @@ try_dmaread:
1763 err = brcmstb_nand_verify_erased_page(mtd, chip, buf, 1763 err = brcmstb_nand_verify_erased_page(mtd, chip, buf,
1764 addr); 1764 addr);
1765 /* erased page bitflips corrected */ 1765 /* erased page bitflips corrected */
1766 if (err > 0) 1766 if (err >= 0)
1767 return err; 1767 return err;
1768 } 1768 }
1769 1769
diff --git a/drivers/mtd/nand/gpio.c b/drivers/mtd/nand/gpio.c
index 484f7fbc3f7d..a8bde6665c24 100644
--- a/drivers/mtd/nand/gpio.c
+++ b/drivers/mtd/nand/gpio.c
@@ -253,9 +253,9 @@ static int gpio_nand_probe(struct platform_device *pdev)
253 goto out_ce; 253 goto out_ce;
254 } 254 }
255 255
256 gpiomtd->nwp = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW); 256 gpiomtd->ale = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW);
257 if (IS_ERR(gpiomtd->nwp)) { 257 if (IS_ERR(gpiomtd->ale)) {
258 ret = PTR_ERR(gpiomtd->nwp); 258 ret = PTR_ERR(gpiomtd->ale);
259 goto out_ce; 259 goto out_ce;
260 } 260 }
261 261
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index 50f8d4a1b983..d4d824ef64e9 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -1067,9 +1067,6 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
1067 return ret; 1067 return ret;
1068 } 1068 }
1069 1069
1070 /* handle the block mark swapping */
1071 block_mark_swapping(this, payload_virt, auxiliary_virt);
1072
1073 /* Loop over status bytes, accumulating ECC status. */ 1070 /* Loop over status bytes, accumulating ECC status. */
1074 status = auxiliary_virt + nfc_geo->auxiliary_status_offset; 1071 status = auxiliary_virt + nfc_geo->auxiliary_status_offset;
1075 1072
@@ -1158,6 +1155,9 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
1158 max_bitflips = max_t(unsigned int, max_bitflips, *status); 1155 max_bitflips = max_t(unsigned int, max_bitflips, *status);
1159 } 1156 }
1160 1157
1158 /* handle the block mark swapping */
1159 block_mark_swapping(this, buf, auxiliary_virt);
1160
1161 if (oob_required) { 1161 if (oob_required) {
1162 /* 1162 /*
1163 * It's time to deliver the OOB bytes. See gpmi_ecc_read_oob() 1163 * It's time to deliver the OOB bytes. See gpmi_ecc_read_oob()
diff --git a/drivers/net/ethernet/arc/emac.h b/drivers/net/ethernet/arc/emac.h
index 3c63b16d485f..d9efbc8d783b 100644
--- a/drivers/net/ethernet/arc/emac.h
+++ b/drivers/net/ethernet/arc/emac.h
@@ -159,6 +159,8 @@ struct arc_emac_priv {
159 unsigned int link; 159 unsigned int link;
160 unsigned int duplex; 160 unsigned int duplex;
161 unsigned int speed; 161 unsigned int speed;
162
163 unsigned int rx_missed_errors;
162}; 164};
163 165
164/** 166/**
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index 3241af1ce718..bd277b0dc615 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -26,6 +26,8 @@
26 26
27#include "emac.h" 27#include "emac.h"
28 28
29static void arc_emac_restart(struct net_device *ndev);
30
29/** 31/**
30 * arc_emac_tx_avail - Return the number of available slots in the tx ring. 32 * arc_emac_tx_avail - Return the number of available slots in the tx ring.
31 * @priv: Pointer to ARC EMAC private data structure. 33 * @priv: Pointer to ARC EMAC private data structure.
@@ -210,39 +212,48 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
210 continue; 212 continue;
211 } 213 }
212 214
213 pktlen = info & LEN_MASK; 215 /* Prepare the BD for next cycle. netif_receive_skb()
214 stats->rx_packets++; 216 * only if new skb was allocated and mapped to avoid holes
215 stats->rx_bytes += pktlen; 217 * in the RX fifo.
216 skb = rx_buff->skb; 218 */
217 skb_put(skb, pktlen); 219 skb = netdev_alloc_skb_ip_align(ndev, EMAC_BUFFER_SIZE);
218 skb->dev = ndev; 220 if (unlikely(!skb)) {
219 skb->protocol = eth_type_trans(skb, ndev); 221 if (net_ratelimit())
220 222 netdev_err(ndev, "cannot allocate skb\n");
221 dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr), 223 /* Return ownership to EMAC */
222 dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE); 224 rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
223
224 /* Prepare the BD for next cycle */
225 rx_buff->skb = netdev_alloc_skb_ip_align(ndev,
226 EMAC_BUFFER_SIZE);
227 if (unlikely(!rx_buff->skb)) {
228 stats->rx_errors++; 225 stats->rx_errors++;
229 /* Because receive_skb is below, increment rx_dropped */
230 stats->rx_dropped++; 226 stats->rx_dropped++;
231 continue; 227 continue;
232 } 228 }
233 229
234 /* receive_skb only if new skb was allocated to avoid holes */ 230 addr = dma_map_single(&ndev->dev, (void *)skb->data,
235 netif_receive_skb(skb);
236
237 addr = dma_map_single(&ndev->dev, (void *)rx_buff->skb->data,
238 EMAC_BUFFER_SIZE, DMA_FROM_DEVICE); 231 EMAC_BUFFER_SIZE, DMA_FROM_DEVICE);
239 if (dma_mapping_error(&ndev->dev, addr)) { 232 if (dma_mapping_error(&ndev->dev, addr)) {
240 if (net_ratelimit()) 233 if (net_ratelimit())
241 netdev_err(ndev, "cannot dma map\n"); 234 netdev_err(ndev, "cannot map dma buffer\n");
242 dev_kfree_skb(rx_buff->skb); 235 dev_kfree_skb(skb);
236 /* Return ownership to EMAC */
237 rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
243 stats->rx_errors++; 238 stats->rx_errors++;
239 stats->rx_dropped++;
244 continue; 240 continue;
245 } 241 }
242
243 /* unmap previosly mapped skb */
244 dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr),
245 dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE);
246
247 pktlen = info & LEN_MASK;
248 stats->rx_packets++;
249 stats->rx_bytes += pktlen;
250 skb_put(rx_buff->skb, pktlen);
251 rx_buff->skb->dev = ndev;
252 rx_buff->skb->protocol = eth_type_trans(rx_buff->skb, ndev);
253
254 netif_receive_skb(rx_buff->skb);
255
256 rx_buff->skb = skb;
246 dma_unmap_addr_set(rx_buff, addr, addr); 257 dma_unmap_addr_set(rx_buff, addr, addr);
247 dma_unmap_len_set(rx_buff, len, EMAC_BUFFER_SIZE); 258 dma_unmap_len_set(rx_buff, len, EMAC_BUFFER_SIZE);
248 259
@@ -259,6 +270,53 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
259} 270}
260 271
261/** 272/**
273 * arc_emac_rx_miss_handle - handle R_MISS register
274 * @ndev: Pointer to the net_device structure.
275 */
276static void arc_emac_rx_miss_handle(struct net_device *ndev)
277{
278 struct arc_emac_priv *priv = netdev_priv(ndev);
279 struct net_device_stats *stats = &ndev->stats;
280 unsigned int miss;
281
282 miss = arc_reg_get(priv, R_MISS);
283 if (miss) {
284 stats->rx_errors += miss;
285 stats->rx_missed_errors += miss;
286 priv->rx_missed_errors += miss;
287 }
288}
289
290/**
291 * arc_emac_rx_stall_check - check RX stall
292 * @ndev: Pointer to the net_device structure.
293 * @budget: How many BDs requested to process on 1 call.
294 * @work_done: How many BDs processed
295 *
296 * Under certain conditions EMAC stop reception of incoming packets and
297 * continuously increment R_MISS register instead of saving data into
298 * provided buffer. This function detect that condition and restart
299 * EMAC.
300 */
301static void arc_emac_rx_stall_check(struct net_device *ndev,
302 int budget, unsigned int work_done)
303{
304 struct arc_emac_priv *priv = netdev_priv(ndev);
305 struct arc_emac_bd *rxbd;
306
307 if (work_done)
308 priv->rx_missed_errors = 0;
309
310 if (priv->rx_missed_errors && budget) {
311 rxbd = &priv->rxbd[priv->last_rx_bd];
312 if (le32_to_cpu(rxbd->info) & FOR_EMAC) {
313 arc_emac_restart(ndev);
314 priv->rx_missed_errors = 0;
315 }
316 }
317}
318
319/**
262 * arc_emac_poll - NAPI poll handler. 320 * arc_emac_poll - NAPI poll handler.
263 * @napi: Pointer to napi_struct structure. 321 * @napi: Pointer to napi_struct structure.
264 * @budget: How many BDs to process on 1 call. 322 * @budget: How many BDs to process on 1 call.
@@ -272,6 +330,7 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
272 unsigned int work_done; 330 unsigned int work_done;
273 331
274 arc_emac_tx_clean(ndev); 332 arc_emac_tx_clean(ndev);
333 arc_emac_rx_miss_handle(ndev);
275 334
276 work_done = arc_emac_rx(ndev, budget); 335 work_done = arc_emac_rx(ndev, budget);
277 if (work_done < budget) { 336 if (work_done < budget) {
@@ -279,6 +338,8 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
279 arc_reg_or(priv, R_ENABLE, RXINT_MASK | TXINT_MASK); 338 arc_reg_or(priv, R_ENABLE, RXINT_MASK | TXINT_MASK);
280 } 339 }
281 340
341 arc_emac_rx_stall_check(ndev, budget, work_done);
342
282 return work_done; 343 return work_done;
283} 344}
284 345
@@ -320,6 +381,8 @@ static irqreturn_t arc_emac_intr(int irq, void *dev_instance)
320 if (status & MSER_MASK) { 381 if (status & MSER_MASK) {
321 stats->rx_missed_errors += 0x100; 382 stats->rx_missed_errors += 0x100;
322 stats->rx_errors += 0x100; 383 stats->rx_errors += 0x100;
384 priv->rx_missed_errors += 0x100;
385 napi_schedule(&priv->napi);
323 } 386 }
324 387
325 if (status & RXCR_MASK) { 388 if (status & RXCR_MASK) {
@@ -732,6 +795,63 @@ static int arc_emac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
732} 795}
733 796
734 797
798/**
799 * arc_emac_restart - Restart EMAC
800 * @ndev: Pointer to net_device structure.
801 *
802 * This function do hardware reset of EMAC in order to restore
803 * network packets reception.
804 */
805static void arc_emac_restart(struct net_device *ndev)
806{
807 struct arc_emac_priv *priv = netdev_priv(ndev);
808 struct net_device_stats *stats = &ndev->stats;
809 int i;
810
811 if (net_ratelimit())
812 netdev_warn(ndev, "restarting stalled EMAC\n");
813
814 netif_stop_queue(ndev);
815
816 /* Disable interrupts */
817 arc_reg_clr(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
818
819 /* Disable EMAC */
820 arc_reg_clr(priv, R_CTRL, EN_MASK);
821
822 /* Return the sk_buff to system */
823 arc_free_tx_queue(ndev);
824
825 /* Clean Tx BD's */
826 priv->txbd_curr = 0;
827 priv->txbd_dirty = 0;
828 memset(priv->txbd, 0, TX_RING_SZ);
829
830 for (i = 0; i < RX_BD_NUM; i++) {
831 struct arc_emac_bd *rxbd = &priv->rxbd[i];
832 unsigned int info = le32_to_cpu(rxbd->info);
833
834 if (!(info & FOR_EMAC)) {
835 stats->rx_errors++;
836 stats->rx_dropped++;
837 }
838 /* Return ownership to EMAC */
839 rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
840 }
841 priv->last_rx_bd = 0;
842
843 /* Make sure info is visible to EMAC before enable */
844 wmb();
845
846 /* Enable interrupts */
847 arc_reg_set(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
848
849 /* Enable EMAC */
850 arc_reg_or(priv, R_CTRL, EN_MASK);
851
852 netif_start_queue(ndev);
853}
854
735static const struct net_device_ops arc_emac_netdev_ops = { 855static const struct net_device_ops arc_emac_netdev_ops = {
736 .ndo_open = arc_emac_open, 856 .ndo_open = arc_emac_open,
737 .ndo_stop = arc_emac_stop, 857 .ndo_stop = arc_emac_stop,
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index de51c2177d03..d09c5a9c53b5 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -14225,7 +14225,9 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu)
14225 /* Reset PHY, otherwise the read DMA engine will be in a mode that 14225 /* Reset PHY, otherwise the read DMA engine will be in a mode that
14226 * breaks all requests to 256 bytes. 14226 * breaks all requests to 256 bytes.
14227 */ 14227 */
14228 if (tg3_asic_rev(tp) == ASIC_REV_57766) 14228 if (tg3_asic_rev(tp) == ASIC_REV_57766 ||
14229 tg3_asic_rev(tp) == ASIC_REV_5717 ||
14230 tg3_asic_rev(tp) == ASIC_REV_5719)
14229 reset_phy = true; 14231 reset_phy = true;
14230 14232
14231 err = tg3_restart_hw(tp, reset_phy); 14233 err = tg3_restart_hw(tp, reset_phy);
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index bc93b69cfd1e..a539263cd79c 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1214,6 +1214,10 @@ static void mvneta_port_disable(struct mvneta_port *pp)
1214 val &= ~MVNETA_GMAC0_PORT_ENABLE; 1214 val &= ~MVNETA_GMAC0_PORT_ENABLE;
1215 mvreg_write(pp, MVNETA_GMAC_CTRL_0, val); 1215 mvreg_write(pp, MVNETA_GMAC_CTRL_0, val);
1216 1216
1217 pp->link = 0;
1218 pp->duplex = -1;
1219 pp->speed = 0;
1220
1217 udelay(200); 1221 udelay(200);
1218} 1222}
1219 1223
@@ -1958,9 +1962,9 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
1958 1962
1959 if (!mvneta_rxq_desc_is_first_last(rx_status) || 1963 if (!mvneta_rxq_desc_is_first_last(rx_status) ||
1960 (rx_status & MVNETA_RXD_ERR_SUMMARY)) { 1964 (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
1965 mvneta_rx_error(pp, rx_desc);
1961err_drop_frame: 1966err_drop_frame:
1962 dev->stats.rx_errors++; 1967 dev->stats.rx_errors++;
1963 mvneta_rx_error(pp, rx_desc);
1964 /* leave the descriptor untouched */ 1968 /* leave the descriptor untouched */
1965 continue; 1969 continue;
1966 } 1970 }
@@ -3011,7 +3015,7 @@ static void mvneta_cleanup_rxqs(struct mvneta_port *pp)
3011{ 3015{
3012 int queue; 3016 int queue;
3013 3017
3014 for (queue = 0; queue < txq_number; queue++) 3018 for (queue = 0; queue < rxq_number; queue++)
3015 mvneta_rxq_deinit(pp, &pp->rxqs[queue]); 3019 mvneta_rxq_deinit(pp, &pp->rxqs[queue]);
3016} 3020}
3017 3021
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 54adfd967858..fc67e35b253e 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1961,11 +1961,12 @@ static int mtk_hw_init(struct mtk_eth *eth)
1961 /* set GE2 TUNE */ 1961 /* set GE2 TUNE */
1962 regmap_write(eth->pctl, GPIO_BIAS_CTRL, 0x0); 1962 regmap_write(eth->pctl, GPIO_BIAS_CTRL, 0x0);
1963 1963
1964 /* GE1, Force 1000M/FD, FC ON */ 1964 /* Set linkdown as the default for each GMAC. Its own MCR would be set
1965 mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(0)); 1965 * up with the more appropriate value when mtk_phy_link_adjust call is
1966 1966 * being invoked.
1967 /* GE2, Force 1000M/FD, FC ON */ 1967 */
1968 mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(1)); 1968 for (i = 0; i < MTK_MAC_COUNT; i++)
1969 mtk_w32(eth, 0, MTK_MAC_MCR(i));
1969 1970
1970 /* Indicates CDM to parse the MTK special tag from CPU 1971 /* Indicates CDM to parse the MTK special tag from CPU
1971 * which also is working out for untag packets. 1972 * which also is working out for untag packets.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 1fffdebbc9e8..e9a1fbcc4adf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -362,7 +362,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
362 case MLX5_CMD_OP_QUERY_VPORT_COUNTER: 362 case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
363 case MLX5_CMD_OP_ALLOC_Q_COUNTER: 363 case MLX5_CMD_OP_ALLOC_Q_COUNTER:
364 case MLX5_CMD_OP_QUERY_Q_COUNTER: 364 case MLX5_CMD_OP_QUERY_Q_COUNTER:
365 case MLX5_CMD_OP_SET_RATE_LIMIT: 365 case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
366 case MLX5_CMD_OP_QUERY_RATE_LIMIT: 366 case MLX5_CMD_OP_QUERY_RATE_LIMIT:
367 case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: 367 case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
368 case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: 368 case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
@@ -505,7 +505,7 @@ const char *mlx5_command_str(int command)
505 MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER); 505 MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
506 MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER); 506 MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
507 MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); 507 MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
508 MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT); 508 MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
509 MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT); 509 MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
510 MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT); 510 MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
511 MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT); 511 MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index c0872b3284cb..543060c305a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -82,6 +82,9 @@
82 max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req) 82 max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req)
83#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6) 83#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6)
84#define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8) 84#define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8)
85#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \
86 (cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \
87 MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev))
85 88
86#define MLX5_MPWRQ_LOG_WQE_SZ 18 89#define MLX5_MPWRQ_LOG_WQE_SZ 18
87#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ 90#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
@@ -590,6 +593,7 @@ struct mlx5e_channel {
590 struct mlx5_core_dev *mdev; 593 struct mlx5_core_dev *mdev;
591 struct hwtstamp_config *tstamp; 594 struct hwtstamp_config *tstamp;
592 int ix; 595 int ix;
596 int cpu;
593}; 597};
594 598
595struct mlx5e_channels { 599struct mlx5e_channels {
@@ -935,8 +939,9 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params,
935 u8 cq_period_mode); 939 u8 cq_period_mode);
936void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, 940void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
937 u8 cq_period_mode); 941 u8 cq_period_mode);
938void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, 942void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
939 struct mlx5e_params *params, u8 rq_type); 943 struct mlx5e_params *params,
944 u8 rq_type);
940 945
941static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) 946static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
942{ 947{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index c6d90b6dd80e..9bcf38f4123b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -274,6 +274,7 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
274static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, 274static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
275 struct ieee_ets *ets) 275 struct ieee_ets *ets)
276{ 276{
277 bool have_ets_tc = false;
277 int bw_sum = 0; 278 int bw_sum = 0;
278 int i; 279 int i;
279 280
@@ -288,11 +289,14 @@ static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
288 } 289 }
289 290
290 /* Validate Bandwidth Sum */ 291 /* Validate Bandwidth Sum */
291 for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) 292 for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
292 if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) 293 if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
294 have_ets_tc = true;
293 bw_sum += ets->tc_tx_bw[i]; 295 bw_sum += ets->tc_tx_bw[i];
296 }
297 }
294 298
295 if (bw_sum != 0 && bw_sum != 100) { 299 if (have_ets_tc && bw_sum != 100) {
296 netdev_err(netdev, 300 netdev_err(netdev,
297 "Failed to validate ETS: BW sum is illegal\n"); 301 "Failed to validate ETS: BW sum is illegal\n");
298 return -EINVAL; 302 return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 23425f028405..8f05efa5c829 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1523,8 +1523,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
1523 new_channels.params = priv->channels.params; 1523 new_channels.params = priv->channels.params;
1524 MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); 1524 MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
1525 1525
1526 mlx5e_set_rq_type_params(priv->mdev, &new_channels.params, 1526 new_channels.params.mpwqe_log_stride_sz =
1527 new_channels.params.rq_wq_type); 1527 MLX5E_MPWQE_STRIDE_SZ(priv->mdev, new_val);
1528 new_channels.params.mpwqe_log_num_strides =
1529 MLX5_MPWRQ_LOG_WQE_SZ - new_channels.params.mpwqe_log_stride_sz;
1528 1530
1529 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { 1531 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
1530 priv->channels.params = new_channels.params; 1532 priv->channels.params = new_channels.params;
@@ -1536,6 +1538,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
1536 return err; 1538 return err;
1537 1539
1538 mlx5e_switch_priv_channels(priv, &new_channels, NULL); 1540 mlx5e_switch_priv_channels(priv, &new_channels, NULL);
1541 mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n",
1542 MLX5E_GET_PFLAG(&priv->channels.params,
1543 MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF");
1544
1539 return 0; 1545 return 0;
1540} 1546}
1541 1547
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d2b057a3e512..d9d8227f195f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -71,11 +71,6 @@ struct mlx5e_channel_param {
71 struct mlx5e_cq_param icosq_cq; 71 struct mlx5e_cq_param icosq_cq;
72}; 72};
73 73
74static int mlx5e_get_node(struct mlx5e_priv *priv, int ix)
75{
76 return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix);
77}
78
79static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) 74static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
80{ 75{
81 return MLX5_CAP_GEN(mdev, striding_rq) && 76 return MLX5_CAP_GEN(mdev, striding_rq) &&
@@ -83,8 +78,8 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
83 MLX5_CAP_ETH(mdev, reg_umr_sq); 78 MLX5_CAP_ETH(mdev, reg_umr_sq);
84} 79}
85 80
86void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, 81void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
87 struct mlx5e_params *params, u8 rq_type) 82 struct mlx5e_params *params, u8 rq_type)
88{ 83{
89 params->rq_wq_type = rq_type; 84 params->rq_wq_type = rq_type;
90 params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; 85 params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
@@ -93,10 +88,8 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
93 params->log_rq_size = is_kdump_kernel() ? 88 params->log_rq_size = is_kdump_kernel() ?
94 MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW : 89 MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW :
95 MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; 90 MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
96 params->mpwqe_log_stride_sz = 91 params->mpwqe_log_stride_sz = MLX5E_MPWQE_STRIDE_SZ(mdev,
97 MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ? 92 MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
98 MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) :
99 MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
100 params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - 93 params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
101 params->mpwqe_log_stride_sz; 94 params->mpwqe_log_stride_sz;
102 break; 95 break;
@@ -120,13 +113,14 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
120 MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); 113 MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
121} 114}
122 115
123static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) 116static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev,
117 struct mlx5e_params *params)
124{ 118{
125 u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) && 119 u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) &&
126 !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ? 120 !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ?
127 MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : 121 MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
128 MLX5_WQ_TYPE_LINKED_LIST; 122 MLX5_WQ_TYPE_LINKED_LIST;
129 mlx5e_set_rq_type_params(mdev, params, rq_type); 123 mlx5e_init_rq_type_params(mdev, params, rq_type);
130} 124}
131 125
132static void mlx5e_update_carrier(struct mlx5e_priv *priv) 126static void mlx5e_update_carrier(struct mlx5e_priv *priv)
@@ -444,17 +438,16 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
444 int wq_sz = mlx5_wq_ll_get_size(&rq->wq); 438 int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
445 int mtt_sz = mlx5e_get_wqe_mtt_sz(); 439 int mtt_sz = mlx5e_get_wqe_mtt_sz();
446 int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1; 440 int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
447 int node = mlx5e_get_node(c->priv, c->ix);
448 int i; 441 int i;
449 442
450 rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info), 443 rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
451 GFP_KERNEL, node); 444 GFP_KERNEL, cpu_to_node(c->cpu));
452 if (!rq->mpwqe.info) 445 if (!rq->mpwqe.info)
453 goto err_out; 446 goto err_out;
454 447
455 /* We allocate more than mtt_sz as we will align the pointer */ 448 /* We allocate more than mtt_sz as we will align the pointer */
456 rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, 449 rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
457 GFP_KERNEL, node); 450 cpu_to_node(c->cpu));
458 if (unlikely(!rq->mpwqe.mtt_no_align)) 451 if (unlikely(!rq->mpwqe.mtt_no_align))
459 goto err_free_wqe_info; 452 goto err_free_wqe_info;
460 453
@@ -562,7 +555,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
562 int err; 555 int err;
563 int i; 556 int i;
564 557
565 rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); 558 rqp->wq.db_numa_node = cpu_to_node(c->cpu);
566 559
567 err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq, 560 err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq,
568 &rq->wq_ctrl); 561 &rq->wq_ctrl);
@@ -629,8 +622,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
629 default: /* MLX5_WQ_TYPE_LINKED_LIST */ 622 default: /* MLX5_WQ_TYPE_LINKED_LIST */
630 rq->wqe.frag_info = 623 rq->wqe.frag_info =
631 kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info), 624 kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info),
632 GFP_KERNEL, 625 GFP_KERNEL, cpu_to_node(c->cpu));
633 mlx5e_get_node(c->priv, c->ix));
634 if (!rq->wqe.frag_info) { 626 if (!rq->wqe.frag_info) {
635 err = -ENOMEM; 627 err = -ENOMEM;
636 goto err_rq_wq_destroy; 628 goto err_rq_wq_destroy;
@@ -1000,13 +992,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
1000 sq->uar_map = mdev->mlx5e_res.bfreg.map; 992 sq->uar_map = mdev->mlx5e_res.bfreg.map;
1001 sq->min_inline_mode = params->tx_min_inline_mode; 993 sq->min_inline_mode = params->tx_min_inline_mode;
1002 994
1003 param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); 995 param->wq.db_numa_node = cpu_to_node(c->cpu);
1004 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); 996 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
1005 if (err) 997 if (err)
1006 return err; 998 return err;
1007 sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; 999 sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1008 1000
1009 err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix)); 1001 err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
1010 if (err) 1002 if (err)
1011 goto err_sq_wq_destroy; 1003 goto err_sq_wq_destroy;
1012 1004
@@ -1053,13 +1045,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
1053 sq->channel = c; 1045 sq->channel = c;
1054 sq->uar_map = mdev->mlx5e_res.bfreg.map; 1046 sq->uar_map = mdev->mlx5e_res.bfreg.map;
1055 1047
1056 param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); 1048 param->wq.db_numa_node = cpu_to_node(c->cpu);
1057 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); 1049 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
1058 if (err) 1050 if (err)
1059 return err; 1051 return err;
1060 sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; 1052 sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1061 1053
1062 err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix)); 1054 err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
1063 if (err) 1055 if (err)
1064 goto err_sq_wq_destroy; 1056 goto err_sq_wq_destroy;
1065 1057
@@ -1126,13 +1118,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
1126 if (MLX5_IPSEC_DEV(c->priv->mdev)) 1118 if (MLX5_IPSEC_DEV(c->priv->mdev))
1127 set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); 1119 set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
1128 1120
1129 param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); 1121 param->wq.db_numa_node = cpu_to_node(c->cpu);
1130 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); 1122 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
1131 if (err) 1123 if (err)
1132 return err; 1124 return err;
1133 sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; 1125 sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1134 1126
1135 err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix)); 1127 err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
1136 if (err) 1128 if (err)
1137 goto err_sq_wq_destroy; 1129 goto err_sq_wq_destroy;
1138 1130
@@ -1504,8 +1496,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c,
1504 struct mlx5_core_dev *mdev = c->priv->mdev; 1496 struct mlx5_core_dev *mdev = c->priv->mdev;
1505 int err; 1497 int err;
1506 1498
1507 param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix); 1499 param->wq.buf_numa_node = cpu_to_node(c->cpu);
1508 param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); 1500 param->wq.db_numa_node = cpu_to_node(c->cpu);
1509 param->eq_ix = c->ix; 1501 param->eq_ix = c->ix;
1510 1502
1511 err = mlx5e_alloc_cq_common(mdev, param, cq); 1503 err = mlx5e_alloc_cq_common(mdev, param, cq);
@@ -1604,6 +1596,11 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
1604 mlx5e_free_cq(cq); 1596 mlx5e_free_cq(cq);
1605} 1597}
1606 1598
1599static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
1600{
1601 return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
1602}
1603
1607static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, 1604static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1608 struct mlx5e_params *params, 1605 struct mlx5e_params *params,
1609 struct mlx5e_channel_param *cparam) 1606 struct mlx5e_channel_param *cparam)
@@ -1752,12 +1749,13 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1752{ 1749{
1753 struct mlx5e_cq_moder icocq_moder = {0, 0}; 1750 struct mlx5e_cq_moder icocq_moder = {0, 0};
1754 struct net_device *netdev = priv->netdev; 1751 struct net_device *netdev = priv->netdev;
1752 int cpu = mlx5e_get_cpu(priv, ix);
1755 struct mlx5e_channel *c; 1753 struct mlx5e_channel *c;
1756 unsigned int irq; 1754 unsigned int irq;
1757 int err; 1755 int err;
1758 int eqn; 1756 int eqn;
1759 1757
1760 c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix)); 1758 c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
1761 if (!c) 1759 if (!c)
1762 return -ENOMEM; 1760 return -ENOMEM;
1763 1761
@@ -1765,6 +1763,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1765 c->mdev = priv->mdev; 1763 c->mdev = priv->mdev;
1766 c->tstamp = &priv->tstamp; 1764 c->tstamp = &priv->tstamp;
1767 c->ix = ix; 1765 c->ix = ix;
1766 c->cpu = cpu;
1768 c->pdev = &priv->mdev->pdev->dev; 1767 c->pdev = &priv->mdev->pdev->dev;
1769 c->netdev = priv->netdev; 1768 c->netdev = priv->netdev;
1770 c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); 1769 c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
@@ -1853,8 +1852,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
1853 for (tc = 0; tc < c->num_tc; tc++) 1852 for (tc = 0; tc < c->num_tc; tc++)
1854 mlx5e_activate_txqsq(&c->sq[tc]); 1853 mlx5e_activate_txqsq(&c->sq[tc]);
1855 mlx5e_activate_rq(&c->rq); 1854 mlx5e_activate_rq(&c->rq);
1856 netif_set_xps_queue(c->netdev, 1855 netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
1857 mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix);
1858} 1856}
1859 1857
1860static void mlx5e_deactivate_channel(struct mlx5e_channel *c) 1858static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
@@ -3679,6 +3677,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
3679 struct sk_buff *skb, 3677 struct sk_buff *skb,
3680 netdev_features_t features) 3678 netdev_features_t features)
3681{ 3679{
3680 unsigned int offset = 0;
3682 struct udphdr *udph; 3681 struct udphdr *udph;
3683 u8 proto; 3682 u8 proto;
3684 u16 port; 3683 u16 port;
@@ -3688,7 +3687,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
3688 proto = ip_hdr(skb)->protocol; 3687 proto = ip_hdr(skb)->protocol;
3689 break; 3688 break;
3690 case htons(ETH_P_IPV6): 3689 case htons(ETH_P_IPV6):
3691 proto = ipv6_hdr(skb)->nexthdr; 3690 proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
3692 break; 3691 break;
3693 default: 3692 default:
3694 goto out; 3693 goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 60771865c99c..e7e7cef2bde4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -466,7 +466,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
466 break; 466 break;
467 case MLX5_EVENT_TYPE_CQ_ERROR: 467 case MLX5_EVENT_TYPE_CQ_ERROR:
468 cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; 468 cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
469 mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrom 0x%x\n", 469 mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
470 cqn, eqe->data.cq_err.syndrome); 470 cqn, eqe->data.cq_err.syndrome);
471 mlx5_cq_event(dev, cqn, eqe->type); 471 mlx5_cq_event(dev, cqn, eqe->type);
472 break; 472 break;
@@ -775,7 +775,7 @@ err1:
775 return err; 775 return err;
776} 776}
777 777
778int mlx5_stop_eqs(struct mlx5_core_dev *dev) 778void mlx5_stop_eqs(struct mlx5_core_dev *dev)
779{ 779{
780 struct mlx5_eq_table *table = &dev->priv.eq_table; 780 struct mlx5_eq_table *table = &dev->priv.eq_table;
781 int err; 781 int err;
@@ -784,22 +784,26 @@ int mlx5_stop_eqs(struct mlx5_core_dev *dev)
784 if (MLX5_CAP_GEN(dev, pg)) { 784 if (MLX5_CAP_GEN(dev, pg)) {
785 err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq); 785 err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
786 if (err) 786 if (err)
787 return err; 787 mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
788 err);
788 } 789 }
789#endif 790#endif
790 791
791 err = mlx5_destroy_unmap_eq(dev, &table->pages_eq); 792 err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
792 if (err) 793 if (err)
793 return err; 794 mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
795 err);
794 796
795 mlx5_destroy_unmap_eq(dev, &table->async_eq); 797 err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
798 if (err)
799 mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
800 err);
796 mlx5_cmd_use_polling(dev); 801 mlx5_cmd_use_polling(dev);
797 802
798 err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq); 803 err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
799 if (err) 804 if (err)
800 mlx5_cmd_use_events(dev); 805 mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
801 806 err);
802 return err;
803} 807}
804 808
805int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, 809int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
index 3c11d6e2160a..14962969c5ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
@@ -66,6 +66,9 @@ static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size,
66 u8 actual_size; 66 u8 actual_size;
67 int err; 67 int err;
68 68
69 if (!size)
70 return -EINVAL;
71
69 if (!fdev->mdev) 72 if (!fdev->mdev)
70 return -ENOTCONN; 73 return -ENOTCONN;
71 74
@@ -95,6 +98,9 @@ static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size,
95 u8 actual_size; 98 u8 actual_size;
96 int err; 99 int err;
97 100
101 if (!size)
102 return -EINVAL;
103
98 if (!fdev->mdev) 104 if (!fdev->mdev)
99 return -ENOTCONN; 105 return -ENOTCONN;
100 106
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index c70fd663a633..dfaad9ecb2b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -174,6 +174,8 @@ static void del_hw_fte(struct fs_node *node);
174static void del_sw_flow_table(struct fs_node *node); 174static void del_sw_flow_table(struct fs_node *node);
175static void del_sw_flow_group(struct fs_node *node); 175static void del_sw_flow_group(struct fs_node *node);
176static void del_sw_fte(struct fs_node *node); 176static void del_sw_fte(struct fs_node *node);
177static void del_sw_prio(struct fs_node *node);
178static void del_sw_ns(struct fs_node *node);
177/* Delete rule (destination) is special case that 179/* Delete rule (destination) is special case that
178 * requires to lock the FTE for all the deletion process. 180 * requires to lock the FTE for all the deletion process.
179 */ 181 */
@@ -408,6 +410,16 @@ static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
408 return NULL; 410 return NULL;
409} 411}
410 412
413static void del_sw_ns(struct fs_node *node)
414{
415 kfree(node);
416}
417
418static void del_sw_prio(struct fs_node *node)
419{
420 kfree(node);
421}
422
411static void del_hw_flow_table(struct fs_node *node) 423static void del_hw_flow_table(struct fs_node *node)
412{ 424{
413 struct mlx5_flow_table *ft; 425 struct mlx5_flow_table *ft;
@@ -2064,7 +2076,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
2064 return ERR_PTR(-ENOMEM); 2076 return ERR_PTR(-ENOMEM);
2065 2077
2066 fs_prio->node.type = FS_TYPE_PRIO; 2078 fs_prio->node.type = FS_TYPE_PRIO;
2067 tree_init_node(&fs_prio->node, NULL, NULL); 2079 tree_init_node(&fs_prio->node, NULL, del_sw_prio);
2068 tree_add_node(&fs_prio->node, &ns->node); 2080 tree_add_node(&fs_prio->node, &ns->node);
2069 fs_prio->num_levels = num_levels; 2081 fs_prio->num_levels = num_levels;
2070 fs_prio->prio = prio; 2082 fs_prio->prio = prio;
@@ -2090,7 +2102,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
2090 return ERR_PTR(-ENOMEM); 2102 return ERR_PTR(-ENOMEM);
2091 2103
2092 fs_init_namespace(ns); 2104 fs_init_namespace(ns);
2093 tree_init_node(&ns->node, NULL, NULL); 2105 tree_init_node(&ns->node, NULL, del_sw_ns);
2094 tree_add_node(&ns->node, &prio->node); 2106 tree_add_node(&ns->node, &prio->node);
2095 list_add_tail(&ns->node.list, &prio->node.children); 2107 list_add_tail(&ns->node.list, &prio->node.children);
2096 2108
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 1a0e797ad001..21d29f7936f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -241,7 +241,7 @@ static void print_health_info(struct mlx5_core_dev *dev)
241 u32 fw; 241 u32 fw;
242 int i; 242 int i;
243 243
244 /* If the syndrom is 0, the device is OK and no need to print buffer */ 244 /* If the syndrome is 0, the device is OK and no need to print buffer */
245 if (!ioread8(&h->synd)) 245 if (!ioread8(&h->synd))
246 return; 246 return;
247 247
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index d2a66dc4adc6..8812d7208e8f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -57,7 +57,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
57 struct mlx5e_params *params) 57 struct mlx5e_params *params)
58{ 58{
59 /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */ 59 /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */
60 mlx5e_set_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST); 60 mlx5e_init_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST);
61 61
62 /* RQ size in ipoib by default is 512 */ 62 /* RQ size in ipoib by default is 512 */
63 params->log_rq_size = is_kdump_kernel() ? 63 params->log_rq_size = is_kdump_kernel() ?
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 5f323442cc5a..8a89c7e8cd63 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -317,9 +317,6 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
317{ 317{
318 struct mlx5_priv *priv = &dev->priv; 318 struct mlx5_priv *priv = &dev->priv;
319 struct mlx5_eq_table *table = &priv->eq_table; 319 struct mlx5_eq_table *table = &priv->eq_table;
320 struct irq_affinity irqdesc = {
321 .pre_vectors = MLX5_EQ_VEC_COMP_BASE,
322 };
323 int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq); 320 int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
324 int nvec; 321 int nvec;
325 322
@@ -333,10 +330,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
333 if (!priv->irq_info) 330 if (!priv->irq_info)
334 goto err_free_msix; 331 goto err_free_msix;
335 332
336 nvec = pci_alloc_irq_vectors_affinity(dev->pdev, 333 nvec = pci_alloc_irq_vectors(dev->pdev,
337 MLX5_EQ_VEC_COMP_BASE + 1, nvec, 334 MLX5_EQ_VEC_COMP_BASE + 1, nvec,
338 PCI_IRQ_MSIX | PCI_IRQ_AFFINITY, 335 PCI_IRQ_MSIX);
339 &irqdesc);
340 if (nvec < 0) 336 if (nvec < 0)
341 return nvec; 337 return nvec;
342 338
@@ -622,6 +618,63 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
622 return (u64)timer_l | (u64)timer_h1 << 32; 618 return (u64)timer_l | (u64)timer_h1 << 32;
623} 619}
624 620
621static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
622{
623 struct mlx5_priv *priv = &mdev->priv;
624 int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
625
626 if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
627 mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
628 return -ENOMEM;
629 }
630
631 cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
632 priv->irq_info[i].mask);
633
634 if (IS_ENABLED(CONFIG_SMP) &&
635 irq_set_affinity_hint(irq, priv->irq_info[i].mask))
636 mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
637
638 return 0;
639}
640
641static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
642{
643 struct mlx5_priv *priv = &mdev->priv;
644 int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
645
646 irq_set_affinity_hint(irq, NULL);
647 free_cpumask_var(priv->irq_info[i].mask);
648}
649
650static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
651{
652 int err;
653 int i;
654
655 for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
656 err = mlx5_irq_set_affinity_hint(mdev, i);
657 if (err)
658 goto err_out;
659 }
660
661 return 0;
662
663err_out:
664 for (i--; i >= 0; i--)
665 mlx5_irq_clear_affinity_hint(mdev, i);
666
667 return err;
668}
669
670static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
671{
672 int i;
673
674 for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
675 mlx5_irq_clear_affinity_hint(mdev, i);
676}
677
625int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, 678int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
626 unsigned int *irqn) 679 unsigned int *irqn)
627{ 680{
@@ -1097,6 +1150,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
1097 goto err_stop_eqs; 1150 goto err_stop_eqs;
1098 } 1151 }
1099 1152
1153 err = mlx5_irq_set_affinity_hints(dev);
1154 if (err) {
1155 dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
1156 goto err_affinity_hints;
1157 }
1158
1100 err = mlx5_init_fs(dev); 1159 err = mlx5_init_fs(dev);
1101 if (err) { 1160 if (err) {
1102 dev_err(&pdev->dev, "Failed to init flow steering\n"); 1161 dev_err(&pdev->dev, "Failed to init flow steering\n");
@@ -1154,6 +1213,9 @@ err_sriov:
1154 mlx5_cleanup_fs(dev); 1213 mlx5_cleanup_fs(dev);
1155 1214
1156err_fs: 1215err_fs:
1216 mlx5_irq_clear_affinity_hints(dev);
1217
1218err_affinity_hints:
1157 free_comp_eqs(dev); 1219 free_comp_eqs(dev);
1158 1220
1159err_stop_eqs: 1221err_stop_eqs:
@@ -1222,6 +1284,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
1222 1284
1223 mlx5_sriov_detach(dev); 1285 mlx5_sriov_detach(dev);
1224 mlx5_cleanup_fs(dev); 1286 mlx5_cleanup_fs(dev);
1287 mlx5_irq_clear_affinity_hints(dev);
1225 free_comp_eqs(dev); 1288 free_comp_eqs(dev);
1226 mlx5_stop_eqs(dev); 1289 mlx5_stop_eqs(dev);
1227 mlx5_put_uars_page(dev, priv->uar); 1290 mlx5_put_uars_page(dev, priv->uar);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index db9e665ab104..889130edb715 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -213,8 +213,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
213err_cmd: 213err_cmd:
214 memset(din, 0, sizeof(din)); 214 memset(din, 0, sizeof(din));
215 memset(dout, 0, sizeof(dout)); 215 memset(dout, 0, sizeof(dout));
216 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); 216 MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
217 MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); 217 MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
218 mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); 218 mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
219 return err; 219 return err;
220} 220}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
index e651e4c02867..d3c33e9eea72 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -125,16 +125,16 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
125 return ret_entry; 125 return ret_entry;
126} 126}
127 127
128static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev, 128static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev,
129 u32 rate, u16 index) 129 u32 rate, u16 index)
130{ 130{
131 u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)] = {0}; 131 u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {0};
132 u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0}; 132 u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0};
133 133
134 MLX5_SET(set_rate_limit_in, in, opcode, 134 MLX5_SET(set_pp_rate_limit_in, in, opcode,
135 MLX5_CMD_OP_SET_RATE_LIMIT); 135 MLX5_CMD_OP_SET_PP_RATE_LIMIT);
136 MLX5_SET(set_rate_limit_in, in, rate_limit_index, index); 136 MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index);
137 MLX5_SET(set_rate_limit_in, in, rate_limit, rate); 137 MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rate);
138 return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); 138 return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
139} 139}
140 140
@@ -173,7 +173,7 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index)
173 entry->refcount++; 173 entry->refcount++;
174 } else { 174 } else {
175 /* new rate limit */ 175 /* new rate limit */
176 err = mlx5_set_rate_limit_cmd(dev, rate, entry->index); 176 err = mlx5_set_pp_rate_limit_cmd(dev, rate, entry->index);
177 if (err) { 177 if (err) {
178 mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n", 178 mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n",
179 rate, err); 179 rate, err);
@@ -209,7 +209,7 @@ void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate)
209 entry->refcount--; 209 entry->refcount--;
210 if (!entry->refcount) { 210 if (!entry->refcount) {
211 /* need to remove rate */ 211 /* need to remove rate */
212 mlx5_set_rate_limit_cmd(dev, 0, entry->index); 212 mlx5_set_pp_rate_limit_cmd(dev, 0, entry->index);
213 entry->rate = 0; 213 entry->rate = 0;
214 } 214 }
215 215
@@ -262,8 +262,8 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev)
262 /* Clear all configured rates */ 262 /* Clear all configured rates */
263 for (i = 0; i < table->max_size; i++) 263 for (i = 0; i < table->max_size; i++)
264 if (table->rl_entry[i].rate) 264 if (table->rl_entry[i].rate)
265 mlx5_set_rate_limit_cmd(dev, 0, 265 mlx5_set_pp_rate_limit_cmd(dev, 0,
266 table->rl_entry[i].index); 266 table->rl_entry[i].index);
267 267
268 kfree(dev->priv.rl_table.rl_entry); 268 kfree(dev->priv.rl_table.rl_entry);
269} 269}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
index 07a9ba6cfc70..2f74953e4561 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
@@ -71,9 +71,9 @@ struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port)
71 struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; 71 struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
72 struct mlx5e_vxlan *vxlan; 72 struct mlx5e_vxlan *vxlan;
73 73
74 spin_lock(&vxlan_db->lock); 74 spin_lock_bh(&vxlan_db->lock);
75 vxlan = radix_tree_lookup(&vxlan_db->tree, port); 75 vxlan = radix_tree_lookup(&vxlan_db->tree, port);
76 spin_unlock(&vxlan_db->lock); 76 spin_unlock_bh(&vxlan_db->lock);
77 77
78 return vxlan; 78 return vxlan;
79} 79}
@@ -88,8 +88,12 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
88 struct mlx5e_vxlan *vxlan; 88 struct mlx5e_vxlan *vxlan;
89 int err; 89 int err;
90 90
91 if (mlx5e_vxlan_lookup_port(priv, port)) 91 mutex_lock(&priv->state_lock);
92 vxlan = mlx5e_vxlan_lookup_port(priv, port);
93 if (vxlan) {
94 atomic_inc(&vxlan->refcount);
92 goto free_work; 95 goto free_work;
96 }
93 97
94 if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port)) 98 if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port))
95 goto free_work; 99 goto free_work;
@@ -99,10 +103,11 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
99 goto err_delete_port; 103 goto err_delete_port;
100 104
101 vxlan->udp_port = port; 105 vxlan->udp_port = port;
106 atomic_set(&vxlan->refcount, 1);
102 107
103 spin_lock_irq(&vxlan_db->lock); 108 spin_lock_bh(&vxlan_db->lock);
104 err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan); 109 err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan);
105 spin_unlock_irq(&vxlan_db->lock); 110 spin_unlock_bh(&vxlan_db->lock);
106 if (err) 111 if (err)
107 goto err_free; 112 goto err_free;
108 113
@@ -113,35 +118,39 @@ err_free:
113err_delete_port: 118err_delete_port:
114 mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); 119 mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
115free_work: 120free_work:
121 mutex_unlock(&priv->state_lock);
116 kfree(vxlan_work); 122 kfree(vxlan_work);
117} 123}
118 124
119static void __mlx5e_vxlan_core_del_port(struct mlx5e_priv *priv, u16 port) 125static void mlx5e_vxlan_del_port(struct work_struct *work)
120{ 126{
127 struct mlx5e_vxlan_work *vxlan_work =
128 container_of(work, struct mlx5e_vxlan_work, work);
129 struct mlx5e_priv *priv = vxlan_work->priv;
121 struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; 130 struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
131 u16 port = vxlan_work->port;
122 struct mlx5e_vxlan *vxlan; 132 struct mlx5e_vxlan *vxlan;
133 bool remove = false;
123 134
124 spin_lock_irq(&vxlan_db->lock); 135 mutex_lock(&priv->state_lock);
125 vxlan = radix_tree_delete(&vxlan_db->tree, port); 136 spin_lock_bh(&vxlan_db->lock);
126 spin_unlock_irq(&vxlan_db->lock); 137 vxlan = radix_tree_lookup(&vxlan_db->tree, port);
127
128 if (!vxlan) 138 if (!vxlan)
129 return; 139 goto out_unlock;
130
131 mlx5e_vxlan_core_del_port_cmd(priv->mdev, vxlan->udp_port);
132
133 kfree(vxlan);
134}
135 140
136static void mlx5e_vxlan_del_port(struct work_struct *work) 141 if (atomic_dec_and_test(&vxlan->refcount)) {
137{ 142 radix_tree_delete(&vxlan_db->tree, port);
138 struct mlx5e_vxlan_work *vxlan_work = 143 remove = true;
139 container_of(work, struct mlx5e_vxlan_work, work); 144 }
140 struct mlx5e_priv *priv = vxlan_work->priv;
141 u16 port = vxlan_work->port;
142 145
143 __mlx5e_vxlan_core_del_port(priv, port); 146out_unlock:
147 spin_unlock_bh(&vxlan_db->lock);
144 148
149 if (remove) {
150 mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
151 kfree(vxlan);
152 }
153 mutex_unlock(&priv->state_lock);
145 kfree(vxlan_work); 154 kfree(vxlan_work);
146} 155}
147 156
@@ -171,12 +180,11 @@ void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv)
171 struct mlx5e_vxlan *vxlan; 180 struct mlx5e_vxlan *vxlan;
172 unsigned int port = 0; 181 unsigned int port = 0;
173 182
174 spin_lock_irq(&vxlan_db->lock); 183 /* Lockless since we are the only radix-tree consumers, wq is disabled */
175 while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) { 184 while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) {
176 port = vxlan->udp_port; 185 port = vxlan->udp_port;
177 spin_unlock_irq(&vxlan_db->lock); 186 radix_tree_delete(&vxlan_db->tree, port);
178 __mlx5e_vxlan_core_del_port(priv, (u16)port); 187 mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
179 spin_lock_irq(&vxlan_db->lock); 188 kfree(vxlan);
180 } 189 }
181 spin_unlock_irq(&vxlan_db->lock);
182} 190}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
index 5def12c048e3..5ef6ae7d568a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
@@ -36,6 +36,7 @@
36#include "en.h" 36#include "en.h"
37 37
38struct mlx5e_vxlan { 38struct mlx5e_vxlan {
39 atomic_t refcount;
39 u16 udp_port; 40 u16 udp_port;
40}; 41};
41 42
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 72ef4f8025f0..be657b8533f0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2436,25 +2436,16 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2436 rhashtable_destroy(&mlxsw_sp->router->neigh_ht); 2436 rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2437} 2437}
2438 2438
2439static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
2440 const struct mlxsw_sp_rif *rif)
2441{
2442 char rauht_pl[MLXSW_REG_RAUHT_LEN];
2443
2444 mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
2445 rif->rif_index, rif->addr);
2446 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2447}
2448
2449static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, 2439static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2450 struct mlxsw_sp_rif *rif) 2440 struct mlxsw_sp_rif *rif)
2451{ 2441{
2452 struct mlxsw_sp_neigh_entry *neigh_entry, *tmp; 2442 struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2453 2443
2454 mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif);
2455 list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list, 2444 list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2456 rif_list_node) 2445 rif_list_node) {
2446 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2457 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); 2447 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2448 }
2458} 2449}
2459 2450
2460enum mlxsw_sp_nexthop_type { 2451enum mlxsw_sp_nexthop_type {
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index e379b78e86ef..13190aa09faf 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -82,10 +82,33 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn)
82 return nfp_net_ebpf_capable(nn) ? "BPF" : ""; 82 return nfp_net_ebpf_capable(nn) ? "BPF" : "";
83} 83}
84 84
85static int
86nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
87{
88 int err;
89
90 nn->app_priv = kzalloc(sizeof(struct nfp_bpf_vnic), GFP_KERNEL);
91 if (!nn->app_priv)
92 return -ENOMEM;
93
94 err = nfp_app_nic_vnic_alloc(app, nn, id);
95 if (err)
96 goto err_free_priv;
97
98 return 0;
99err_free_priv:
100 kfree(nn->app_priv);
101 return err;
102}
103
85static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn) 104static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn)
86{ 105{
106 struct nfp_bpf_vnic *bv = nn->app_priv;
107
87 if (nn->dp.bpf_offload_xdp) 108 if (nn->dp.bpf_offload_xdp)
88 nfp_bpf_xdp_offload(app, nn, NULL); 109 nfp_bpf_xdp_offload(app, nn, NULL);
110 WARN_ON(bv->tc_prog);
111 kfree(bv);
89} 112}
90 113
91static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, 114static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
@@ -93,6 +116,9 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
93{ 116{
94 struct tc_cls_bpf_offload *cls_bpf = type_data; 117 struct tc_cls_bpf_offload *cls_bpf = type_data;
95 struct nfp_net *nn = cb_priv; 118 struct nfp_net *nn = cb_priv;
119 struct bpf_prog *oldprog;
120 struct nfp_bpf_vnic *bv;
121 int err;
96 122
97 if (type != TC_SETUP_CLSBPF || 123 if (type != TC_SETUP_CLSBPF ||
98 !tc_can_offload(nn->dp.netdev) || 124 !tc_can_offload(nn->dp.netdev) ||
@@ -100,8 +126,6 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
100 cls_bpf->common.protocol != htons(ETH_P_ALL) || 126 cls_bpf->common.protocol != htons(ETH_P_ALL) ||
101 cls_bpf->common.chain_index) 127 cls_bpf->common.chain_index)
102 return -EOPNOTSUPP; 128 return -EOPNOTSUPP;
103 if (nn->dp.bpf_offload_xdp)
104 return -EBUSY;
105 129
106 /* Only support TC direct action */ 130 /* Only support TC direct action */
107 if (!cls_bpf->exts_integrated || 131 if (!cls_bpf->exts_integrated ||
@@ -110,16 +134,25 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
110 return -EOPNOTSUPP; 134 return -EOPNOTSUPP;
111 } 135 }
112 136
113 switch (cls_bpf->command) { 137 if (cls_bpf->command != TC_CLSBPF_OFFLOAD)
114 case TC_CLSBPF_REPLACE:
115 return nfp_net_bpf_offload(nn, cls_bpf->prog, true);
116 case TC_CLSBPF_ADD:
117 return nfp_net_bpf_offload(nn, cls_bpf->prog, false);
118 case TC_CLSBPF_DESTROY:
119 return nfp_net_bpf_offload(nn, NULL, true);
120 default:
121 return -EOPNOTSUPP; 138 return -EOPNOTSUPP;
139
140 bv = nn->app_priv;
141 oldprog = cls_bpf->oldprog;
142
143 /* Don't remove if oldprog doesn't match driver's state */
144 if (bv->tc_prog != oldprog) {
145 oldprog = NULL;
146 if (!cls_bpf->prog)
147 return 0;
122 } 148 }
149
150 err = nfp_net_bpf_offload(nn, cls_bpf->prog, oldprog);
151 if (err)
152 return err;
153
154 bv->tc_prog = cls_bpf->prog;
155 return 0;
123} 156}
124 157
125static int nfp_bpf_setup_tc_block(struct net_device *netdev, 158static int nfp_bpf_setup_tc_block(struct net_device *netdev,
@@ -167,7 +200,7 @@ const struct nfp_app_type app_bpf = {
167 200
168 .extra_cap = nfp_bpf_extra_cap, 201 .extra_cap = nfp_bpf_extra_cap,
169 202
170 .vnic_alloc = nfp_app_nic_vnic_alloc, 203 .vnic_alloc = nfp_bpf_vnic_alloc,
171 .vnic_free = nfp_bpf_vnic_free, 204 .vnic_free = nfp_bpf_vnic_free,
172 205
173 .setup_tc = nfp_bpf_setup_tc, 206 .setup_tc = nfp_bpf_setup_tc,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 082a15f6dfb5..57b6043177a3 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -172,6 +172,14 @@ struct nfp_prog {
172 struct list_head insns; 172 struct list_head insns;
173}; 173};
174 174
175/**
176 * struct nfp_bpf_vnic - per-vNIC BPF priv structure
177 * @tc_prog: currently loaded cls_bpf program
178 */
179struct nfp_bpf_vnic {
180 struct bpf_prog *tc_prog;
181};
182
175int nfp_bpf_jit(struct nfp_prog *prog); 183int nfp_bpf_jit(struct nfp_prog *prog);
176 184
177extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops; 185extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops;
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 70c92b649b29..38c924bdd32e 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -253,18 +253,18 @@ static int emac_open(struct net_device *netdev)
253 return ret; 253 return ret;
254 } 254 }
255 255
256 ret = emac_mac_up(adpt); 256 ret = adpt->phy.open(adpt);
257 if (ret) { 257 if (ret) {
258 emac_mac_rx_tx_rings_free_all(adpt); 258 emac_mac_rx_tx_rings_free_all(adpt);
259 free_irq(irq->irq, irq); 259 free_irq(irq->irq, irq);
260 return ret; 260 return ret;
261 } 261 }
262 262
263 ret = adpt->phy.open(adpt); 263 ret = emac_mac_up(adpt);
264 if (ret) { 264 if (ret) {
265 emac_mac_down(adpt);
266 emac_mac_rx_tx_rings_free_all(adpt); 265 emac_mac_rx_tx_rings_free_all(adpt);
267 free_irq(irq->irq, irq); 266 free_irq(irq->irq, irq);
267 adpt->phy.close(adpt);
268 return ret; 268 return ret;
269 } 269 }
270 270
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index e1e5ac053760..ce2ea2d491ac 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -409,7 +409,7 @@ struct stmmac_desc_ops {
409 /* get timestamp value */ 409 /* get timestamp value */
410 u64(*get_timestamp) (void *desc, u32 ats); 410 u64(*get_timestamp) (void *desc, u32 ats);
411 /* get rx timestamp status */ 411 /* get rx timestamp status */
412 int (*get_rx_timestamp_status) (void *desc, u32 ats); 412 int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats);
413 /* Display ring */ 413 /* Display ring */
414 void (*display_ring)(void *head, unsigned int size, bool rx); 414 void (*display_ring)(void *head, unsigned int size, bool rx);
415 /* set MSS via context descriptor */ 415 /* set MSS via context descriptor */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 4b286e27c4ca..7e089bf906b4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -258,7 +258,8 @@ static int dwmac4_rx_check_timestamp(void *desc)
258 return ret; 258 return ret;
259} 259}
260 260
261static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats) 261static int dwmac4_wrback_get_rx_timestamp_status(void *desc, void *next_desc,
262 u32 ats)
262{ 263{
263 struct dma_desc *p = (struct dma_desc *)desc; 264 struct dma_desc *p = (struct dma_desc *)desc;
264 int ret = -EINVAL; 265 int ret = -EINVAL;
@@ -270,7 +271,7 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats)
270 271
271 /* Check if timestamp is OK from context descriptor */ 272 /* Check if timestamp is OK from context descriptor */
272 do { 273 do {
273 ret = dwmac4_rx_check_timestamp(desc); 274 ret = dwmac4_rx_check_timestamp(next_desc);
274 if (ret < 0) 275 if (ret < 0)
275 goto exit; 276 goto exit;
276 i++; 277 i++;
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index 7546b3664113..2a828a312814 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -400,7 +400,8 @@ static u64 enh_desc_get_timestamp(void *desc, u32 ats)
400 return ns; 400 return ns;
401} 401}
402 402
403static int enh_desc_get_rx_timestamp_status(void *desc, u32 ats) 403static int enh_desc_get_rx_timestamp_status(void *desc, void *next_desc,
404 u32 ats)
404{ 405{
405 if (ats) { 406 if (ats) {
406 struct dma_extended_desc *p = (struct dma_extended_desc *)desc; 407 struct dma_extended_desc *p = (struct dma_extended_desc *)desc;
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index f817f8f36569..db4cee57bb24 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -265,7 +265,7 @@ static u64 ndesc_get_timestamp(void *desc, u32 ats)
265 return ns; 265 return ns;
266} 266}
267 267
268static int ndesc_get_rx_timestamp_status(void *desc, u32 ats) 268static int ndesc_get_rx_timestamp_status(void *desc, void *next_desc, u32 ats)
269{ 269{
270 struct dma_desc *p = (struct dma_desc *)desc; 270 struct dma_desc *p = (struct dma_desc *)desc;
271 271
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index 721b61655261..08c19ebd5306 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -34,6 +34,7 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
34{ 34{
35 u32 value = readl(ioaddr + PTP_TCR); 35 u32 value = readl(ioaddr + PTP_TCR);
36 unsigned long data; 36 unsigned long data;
37 u32 reg_value;
37 38
38 /* For GMAC3.x, 4.x versions, convert the ptp_clock to nano second 39 /* For GMAC3.x, 4.x versions, convert the ptp_clock to nano second
39 * formula = (1/ptp_clock) * 1000000000 40 * formula = (1/ptp_clock) * 1000000000
@@ -50,10 +51,11 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
50 51
51 data &= PTP_SSIR_SSINC_MASK; 52 data &= PTP_SSIR_SSINC_MASK;
52 53
54 reg_value = data;
53 if (gmac4) 55 if (gmac4)
54 data = data << GMAC4_PTP_SSIR_SSINC_SHIFT; 56 reg_value <<= GMAC4_PTP_SSIR_SSINC_SHIFT;
55 57
56 writel(data, ioaddr + PTP_SSIR); 58 writel(reg_value, ioaddr + PTP_SSIR);
57 59
58 return data; 60 return data;
59} 61}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d7250539d0bd..337d53d12e94 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -482,7 +482,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
482 desc = np; 482 desc = np;
483 483
484 /* Check if timestamp is available */ 484 /* Check if timestamp is available */
485 if (priv->hw->desc->get_rx_timestamp_status(desc, priv->adv_ts)) { 485 if (priv->hw->desc->get_rx_timestamp_status(p, np, priv->adv_ts)) {
486 ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts); 486 ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts);
487 netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns); 487 netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
488 shhwtstamp = skb_hwtstamps(skb); 488 shhwtstamp = skb_hwtstamps(skb);
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index b5a8f750e433..82104edca393 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -879,6 +879,8 @@ static int m88e1510_config_init(struct phy_device *phydev)
879 879
880 /* SGMII-to-Copper mode initialization */ 880 /* SGMII-to-Copper mode initialization */
881 if (phydev->interface == PHY_INTERFACE_MODE_SGMII) { 881 if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
882 u32 pause;
883
882 /* Select page 18 */ 884 /* Select page 18 */
883 err = marvell_set_page(phydev, 18); 885 err = marvell_set_page(phydev, 18);
884 if (err < 0) 886 if (err < 0)
@@ -902,6 +904,16 @@ static int m88e1510_config_init(struct phy_device *phydev)
902 err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE); 904 err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE);
903 if (err < 0) 905 if (err < 0)
904 return err; 906 return err;
907
908 /* There appears to be a bug in the 88e1512 when used in
909 * SGMII to copper mode, where the AN advertisment register
910 * clears the pause bits each time a negotiation occurs.
911 * This means we can never be truely sure what was advertised,
912 * so disable Pause support.
913 */
914 pause = SUPPORTED_Pause | SUPPORTED_Asym_Pause;
915 phydev->supported &= ~pause;
916 phydev->advertising &= ~pause;
905 } 917 }
906 918
907 return m88e1121_config_init(phydev); 919 return m88e1121_config_init(phydev);
@@ -2073,7 +2085,7 @@ static struct phy_driver marvell_drivers[] = {
2073 .flags = PHY_HAS_INTERRUPT, 2085 .flags = PHY_HAS_INTERRUPT,
2074 .probe = marvell_probe, 2086 .probe = marvell_probe,
2075 .config_init = &m88e1145_config_init, 2087 .config_init = &m88e1145_config_init,
2076 .config_aneg = &marvell_config_aneg, 2088 .config_aneg = &m88e1101_config_aneg,
2077 .read_status = &genphy_read_status, 2089 .read_status = &genphy_read_status,
2078 .ack_interrupt = &marvell_ack_interrupt, 2090 .ack_interrupt = &marvell_ack_interrupt,
2079 .config_intr = &marvell_config_intr, 2091 .config_intr = &marvell_config_intr,
diff --git a/drivers/net/phy/mdio-xgene.c b/drivers/net/phy/mdio-xgene.c
index bfd3090fb055..07c6048200c6 100644
--- a/drivers/net/phy/mdio-xgene.c
+++ b/drivers/net/phy/mdio-xgene.c
@@ -194,8 +194,11 @@ static int xgene_mdio_reset(struct xgene_mdio_pdata *pdata)
194 } 194 }
195 195
196 ret = xgene_enet_ecc_init(pdata); 196 ret = xgene_enet_ecc_init(pdata);
197 if (ret) 197 if (ret) {
198 if (pdata->dev->of_node)
199 clk_disable_unprepare(pdata->clk);
198 return ret; 200 return ret;
201 }
199 xgene_gmac_reset(pdata); 202 xgene_gmac_reset(pdata);
200 203
201 return 0; 204 return 0;
@@ -388,8 +391,10 @@ static int xgene_mdio_probe(struct platform_device *pdev)
388 return ret; 391 return ret;
389 392
390 mdio_bus = mdiobus_alloc(); 393 mdio_bus = mdiobus_alloc();
391 if (!mdio_bus) 394 if (!mdio_bus) {
392 return -ENOMEM; 395 ret = -ENOMEM;
396 goto out_clk;
397 }
393 398
394 mdio_bus->name = "APM X-Gene MDIO bus"; 399 mdio_bus->name = "APM X-Gene MDIO bus";
395 400
@@ -418,7 +423,7 @@ static int xgene_mdio_probe(struct platform_device *pdev)
418 mdio_bus->phy_mask = ~0; 423 mdio_bus->phy_mask = ~0;
419 ret = mdiobus_register(mdio_bus); 424 ret = mdiobus_register(mdio_bus);
420 if (ret) 425 if (ret)
421 goto out; 426 goto out_mdiobus;
422 427
423 acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_HANDLE(dev), 1, 428 acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_HANDLE(dev), 1,
424 acpi_register_phy, NULL, mdio_bus, NULL); 429 acpi_register_phy, NULL, mdio_bus, NULL);
@@ -426,16 +431,20 @@ static int xgene_mdio_probe(struct platform_device *pdev)
426 } 431 }
427 432
428 if (ret) 433 if (ret)
429 goto out; 434 goto out_mdiobus;
430 435
431 pdata->mdio_bus = mdio_bus; 436 pdata->mdio_bus = mdio_bus;
432 xgene_mdio_status = true; 437 xgene_mdio_status = true;
433 438
434 return 0; 439 return 0;
435 440
436out: 441out_mdiobus:
437 mdiobus_free(mdio_bus); 442 mdiobus_free(mdio_bus);
438 443
444out_clk:
445 if (dev->of_node)
446 clk_disable_unprepare(pdata->clk);
447
439 return ret; 448 return ret;
440} 449}
441 450
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 19b9cc51079e..31f4b7911ef8 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2155,6 +2155,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
2155 } 2155 }
2156 2156
2157 ndst = &rt->dst; 2157 ndst = &rt->dst;
2158 if (skb_dst(skb)) {
2159 int mtu = dst_mtu(ndst) - VXLAN_HEADROOM;
2160
2161 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
2162 skb, mtu);
2163 }
2164
2158 tos = ip_tunnel_ecn_encap(tos, old_iph, skb); 2165 tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
2159 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); 2166 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
2160 err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr), 2167 err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
@@ -2190,6 +2197,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
2190 goto out_unlock; 2197 goto out_unlock;
2191 } 2198 }
2192 2199
2200 if (skb_dst(skb)) {
2201 int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM;
2202
2203 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
2204 skb, mtu);
2205 }
2206
2193 tos = ip_tunnel_ecn_encap(tos, old_iph, skb); 2207 tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
2194 ttl = ttl ? : ip6_dst_hoplimit(ndst); 2208 ttl = ttl ? : ip6_dst_hoplimit(ndst);
2195 skb_scrub_packet(skb, xnet); 2209 skb_scrub_packet(skb, xnet);
@@ -3103,6 +3117,11 @@ static void vxlan_config_apply(struct net_device *dev,
3103 3117
3104 max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : 3118 max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM :
3105 VXLAN_HEADROOM); 3119 VXLAN_HEADROOM);
3120 if (max_mtu < ETH_MIN_MTU)
3121 max_mtu = ETH_MIN_MTU;
3122
3123 if (!changelink && !conf->mtu)
3124 dev->mtu = max_mtu;
3106 } 3125 }
3107 3126
3108 if (dev->mtu > max_mtu) 3127 if (dev->mtu > max_mtu)
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 10b075a46b26..e8189c07b41f 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -684,6 +684,7 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac,
684 hdr = skb_put(skb, sizeof(*hdr) - ETH_ALEN); 684 hdr = skb_put(skb, sizeof(*hdr) - ETH_ALEN);
685 hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA | 685 hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA |
686 IEEE80211_STYPE_NULLFUNC | 686 IEEE80211_STYPE_NULLFUNC |
687 IEEE80211_FCTL_TODS |
687 (ps ? IEEE80211_FCTL_PM : 0)); 688 (ps ? IEEE80211_FCTL_PM : 0));
688 hdr->duration_id = cpu_to_le16(0); 689 hdr->duration_id = cpu_to_le16(0);
689 memcpy(hdr->addr1, vp->bssid, ETH_ALEN); 690 memcpy(hdr->addr1, vp->bssid, ETH_ALEN);
@@ -3215,7 +3216,7 @@ static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info)
3215 if (!net_eq(wiphy_net(data->hw->wiphy), genl_info_net(info))) 3216 if (!net_eq(wiphy_net(data->hw->wiphy), genl_info_net(info)))
3216 continue; 3217 continue;
3217 3218
3218 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 3219 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
3219 if (!skb) { 3220 if (!skb) {
3220 res = -ENOMEM; 3221 res = -ENOMEM;
3221 goto out_err; 3222 goto out_err;
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index e949e3302af4..c586bcdb5190 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -211,12 +211,12 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
211 return ret; 211 return ret;
212} 212}
213 213
214static int btt_log_read_pair(struct arena_info *arena, u32 lane, 214static int btt_log_group_read(struct arena_info *arena, u32 lane,
215 struct log_entry *ent) 215 struct log_group *log)
216{ 216{
217 return arena_read_bytes(arena, 217 return arena_read_bytes(arena,
218 arena->logoff + (2 * lane * LOG_ENT_SIZE), ent, 218 arena->logoff + (lane * LOG_GRP_SIZE), log,
219 2 * LOG_ENT_SIZE, 0); 219 LOG_GRP_SIZE, 0);
220} 220}
221 221
222static struct dentry *debugfs_root; 222static struct dentry *debugfs_root;
@@ -256,6 +256,8 @@ static void arena_debugfs_init(struct arena_info *a, struct dentry *parent,
256 debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff); 256 debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff);
257 debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off); 257 debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off);
258 debugfs_create_x32("flags", S_IRUGO, d, &a->flags); 258 debugfs_create_x32("flags", S_IRUGO, d, &a->flags);
259 debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]);
260 debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]);
259} 261}
260 262
261static void btt_debugfs_init(struct btt *btt) 263static void btt_debugfs_init(struct btt *btt)
@@ -274,6 +276,11 @@ static void btt_debugfs_init(struct btt *btt)
274 } 276 }
275} 277}
276 278
279static u32 log_seq(struct log_group *log, int log_idx)
280{
281 return le32_to_cpu(log->ent[log_idx].seq);
282}
283
277/* 284/*
278 * This function accepts two log entries, and uses the 285 * This function accepts two log entries, and uses the
279 * sequence number to find the 'older' entry. 286 * sequence number to find the 'older' entry.
@@ -283,8 +290,10 @@ static void btt_debugfs_init(struct btt *btt)
283 * 290 *
284 * TODO The logic feels a bit kludge-y. make it better.. 291 * TODO The logic feels a bit kludge-y. make it better..
285 */ 292 */
286static int btt_log_get_old(struct log_entry *ent) 293static int btt_log_get_old(struct arena_info *a, struct log_group *log)
287{ 294{
295 int idx0 = a->log_index[0];
296 int idx1 = a->log_index[1];
288 int old; 297 int old;
289 298
290 /* 299 /*
@@ -292,23 +301,23 @@ static int btt_log_get_old(struct log_entry *ent)
292 * the next time, the following logic works out to put this 301 * the next time, the following logic works out to put this
293 * (next) entry into [1] 302 * (next) entry into [1]
294 */ 303 */
295 if (ent[0].seq == 0) { 304 if (log_seq(log, idx0) == 0) {
296 ent[0].seq = cpu_to_le32(1); 305 log->ent[idx0].seq = cpu_to_le32(1);
297 return 0; 306 return 0;
298 } 307 }
299 308
300 if (ent[0].seq == ent[1].seq) 309 if (log_seq(log, idx0) == log_seq(log, idx1))
301 return -EINVAL; 310 return -EINVAL;
302 if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5) 311 if (log_seq(log, idx0) + log_seq(log, idx1) > 5)
303 return -EINVAL; 312 return -EINVAL;
304 313
305 if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) { 314 if (log_seq(log, idx0) < log_seq(log, idx1)) {
306 if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1) 315 if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1)
307 old = 0; 316 old = 0;
308 else 317 else
309 old = 1; 318 old = 1;
310 } else { 319 } else {
311 if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1) 320 if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1)
312 old = 1; 321 old = 1;
313 else 322 else
314 old = 0; 323 old = 0;
@@ -328,17 +337,18 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
328{ 337{
329 int ret; 338 int ret;
330 int old_ent, ret_ent; 339 int old_ent, ret_ent;
331 struct log_entry log[2]; 340 struct log_group log;
332 341
333 ret = btt_log_read_pair(arena, lane, log); 342 ret = btt_log_group_read(arena, lane, &log);
334 if (ret) 343 if (ret)
335 return -EIO; 344 return -EIO;
336 345
337 old_ent = btt_log_get_old(log); 346 old_ent = btt_log_get_old(arena, &log);
338 if (old_ent < 0 || old_ent > 1) { 347 if (old_ent < 0 || old_ent > 1) {
339 dev_err(to_dev(arena), 348 dev_err(to_dev(arena),
340 "log corruption (%d): lane %d seq [%d, %d]\n", 349 "log corruption (%d): lane %d seq [%d, %d]\n",
341 old_ent, lane, log[0].seq, log[1].seq); 350 old_ent, lane, log.ent[arena->log_index[0]].seq,
351 log.ent[arena->log_index[1]].seq);
342 /* TODO set error state? */ 352 /* TODO set error state? */
343 return -EIO; 353 return -EIO;
344 } 354 }
@@ -346,7 +356,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
346 ret_ent = (old_flag ? old_ent : (1 - old_ent)); 356 ret_ent = (old_flag ? old_ent : (1 - old_ent));
347 357
348 if (ent != NULL) 358 if (ent != NULL)
349 memcpy(ent, &log[ret_ent], LOG_ENT_SIZE); 359 memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE);
350 360
351 return ret_ent; 361 return ret_ent;
352} 362}
@@ -360,17 +370,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane,
360 u32 sub, struct log_entry *ent, unsigned long flags) 370 u32 sub, struct log_entry *ent, unsigned long flags)
361{ 371{
362 int ret; 372 int ret;
363 /* 373 u32 group_slot = arena->log_index[sub];
364 * Ignore the padding in log_entry for calculating log_half. 374 unsigned int log_half = LOG_ENT_SIZE / 2;
365 * The entry is 'committed' when we write the sequence number,
366 * and we want to ensure that that is the last thing written.
367 * We don't bother writing the padding as that would be extra
368 * media wear and write amplification
369 */
370 unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2;
371 u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE);
372 void *src = ent; 375 void *src = ent;
376 u64 ns_off;
373 377
378 ns_off = arena->logoff + (lane * LOG_GRP_SIZE) +
379 (group_slot * LOG_ENT_SIZE);
374 /* split the 16B write into atomic, durable halves */ 380 /* split the 16B write into atomic, durable halves */
375 ret = arena_write_bytes(arena, ns_off, src, log_half, flags); 381 ret = arena_write_bytes(arena, ns_off, src, log_half, flags);
376 if (ret) 382 if (ret)
@@ -453,7 +459,7 @@ static int btt_log_init(struct arena_info *arena)
453{ 459{
454 size_t logsize = arena->info2off - arena->logoff; 460 size_t logsize = arena->info2off - arena->logoff;
455 size_t chunk_size = SZ_4K, offset = 0; 461 size_t chunk_size = SZ_4K, offset = 0;
456 struct log_entry log; 462 struct log_entry ent;
457 void *zerobuf; 463 void *zerobuf;
458 int ret; 464 int ret;
459 u32 i; 465 u32 i;
@@ -485,11 +491,11 @@ static int btt_log_init(struct arena_info *arena)
485 } 491 }
486 492
487 for (i = 0; i < arena->nfree; i++) { 493 for (i = 0; i < arena->nfree; i++) {
488 log.lba = cpu_to_le32(i); 494 ent.lba = cpu_to_le32(i);
489 log.old_map = cpu_to_le32(arena->external_nlba + i); 495 ent.old_map = cpu_to_le32(arena->external_nlba + i);
490 log.new_map = cpu_to_le32(arena->external_nlba + i); 496 ent.new_map = cpu_to_le32(arena->external_nlba + i);
491 log.seq = cpu_to_le32(LOG_SEQ_INIT); 497 ent.seq = cpu_to_le32(LOG_SEQ_INIT);
492 ret = __btt_log_write(arena, i, 0, &log, 0); 498 ret = __btt_log_write(arena, i, 0, &ent, 0);
493 if (ret) 499 if (ret)
494 goto free; 500 goto free;
495 } 501 }
@@ -594,6 +600,123 @@ static int btt_freelist_init(struct arena_info *arena)
594 return 0; 600 return 0;
595} 601}
596 602
603static bool ent_is_padding(struct log_entry *ent)
604{
605 return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0)
606 && (ent->seq == 0);
607}
608
609/*
610 * Detecting valid log indices: We read a log group (see the comments in btt.h
611 * for a description of a 'log_group' and its 'slots'), and iterate over its
612 * four slots. We expect that a padding slot will be all-zeroes, and use this
613 * to detect a padding slot vs. an actual entry.
614 *
615 * If a log_group is in the initial state, i.e. hasn't been used since the
616 * creation of this BTT layout, it will have three of the four slots with
617 * zeroes. We skip over these log_groups for the detection of log_index. If
618 * all log_groups are in the initial state (i.e. the BTT has never been
619 * written to), it is safe to assume the 'new format' of log entries in slots
620 * (0, 1).
621 */
622static int log_set_indices(struct arena_info *arena)
623{
624 bool idx_set = false, initial_state = true;
625 int ret, log_index[2] = {-1, -1};
626 u32 i, j, next_idx = 0;
627 struct log_group log;
628 u32 pad_count = 0;
629
630 for (i = 0; i < arena->nfree; i++) {
631 ret = btt_log_group_read(arena, i, &log);
632 if (ret < 0)
633 return ret;
634
635 for (j = 0; j < 4; j++) {
636 if (!idx_set) {
637 if (ent_is_padding(&log.ent[j])) {
638 pad_count++;
639 continue;
640 } else {
641 /* Skip if index has been recorded */
642 if ((next_idx == 1) &&
643 (j == log_index[0]))
644 continue;
645 /* valid entry, record index */
646 log_index[next_idx] = j;
647 next_idx++;
648 }
649 if (next_idx == 2) {
650 /* two valid entries found */
651 idx_set = true;
652 } else if (next_idx > 2) {
653 /* too many valid indices */
654 return -ENXIO;
655 }
656 } else {
657 /*
658 * once the indices have been set, just verify
659 * that all subsequent log groups are either in
660 * their initial state or follow the same
661 * indices.
662 */
663 if (j == log_index[0]) {
664 /* entry must be 'valid' */
665 if (ent_is_padding(&log.ent[j]))
666 return -ENXIO;
667 } else if (j == log_index[1]) {
668 ;
669 /*
670 * log_index[1] can be padding if the
671 * lane never got used and it is still
672 * in the initial state (three 'padding'
673 * entries)
674 */
675 } else {
676 /* entry must be invalid (padding) */
677 if (!ent_is_padding(&log.ent[j]))
678 return -ENXIO;
679 }
680 }
681 }
682 /*
683 * If any of the log_groups have more than one valid,
684 * non-padding entry, then the we are no longer in the
685 * initial_state
686 */
687 if (pad_count < 3)
688 initial_state = false;
689 pad_count = 0;
690 }
691
692 if (!initial_state && !idx_set)
693 return -ENXIO;
694
695 /*
696 * If all the entries in the log were in the initial state,
697 * assume new padding scheme
698 */
699 if (initial_state)
700 log_index[1] = 1;
701
702 /*
703 * Only allow the known permutations of log/padding indices,
704 * i.e. (0, 1), and (0, 2)
705 */
706 if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2)))
707 ; /* known index possibilities */
708 else {
709 dev_err(to_dev(arena), "Found an unknown padding scheme\n");
710 return -ENXIO;
711 }
712
713 arena->log_index[0] = log_index[0];
714 arena->log_index[1] = log_index[1];
715 dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]);
716 dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]);
717 return 0;
718}
719
597static int btt_rtt_init(struct arena_info *arena) 720static int btt_rtt_init(struct arena_info *arena)
598{ 721{
599 arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL); 722 arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL);
@@ -650,8 +773,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
650 available -= 2 * BTT_PG_SIZE; 773 available -= 2 * BTT_PG_SIZE;
651 774
652 /* The log takes a fixed amount of space based on nfree */ 775 /* The log takes a fixed amount of space based on nfree */
653 logsize = roundup(2 * arena->nfree * sizeof(struct log_entry), 776 logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE);
654 BTT_PG_SIZE);
655 available -= logsize; 777 available -= logsize;
656 778
657 /* Calculate optimal split between map and data area */ 779 /* Calculate optimal split between map and data area */
@@ -668,6 +790,10 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
668 arena->mapoff = arena->dataoff + datasize; 790 arena->mapoff = arena->dataoff + datasize;
669 arena->logoff = arena->mapoff + mapsize; 791 arena->logoff = arena->mapoff + mapsize;
670 arena->info2off = arena->logoff + logsize; 792 arena->info2off = arena->logoff + logsize;
793
794 /* Default log indices are (0,1) */
795 arena->log_index[0] = 0;
796 arena->log_index[1] = 1;
671 return arena; 797 return arena;
672} 798}
673 799
@@ -758,6 +884,13 @@ static int discover_arenas(struct btt *btt)
758 arena->external_lba_start = cur_nlba; 884 arena->external_lba_start = cur_nlba;
759 parse_arena_meta(arena, super, cur_off); 885 parse_arena_meta(arena, super, cur_off);
760 886
887 ret = log_set_indices(arena);
888 if (ret) {
889 dev_err(to_dev(arena),
890 "Unable to deduce log/padding indices\n");
891 goto out;
892 }
893
761 mutex_init(&arena->err_lock); 894 mutex_init(&arena->err_lock);
762 ret = btt_freelist_init(arena); 895 ret = btt_freelist_init(arena);
763 if (ret) 896 if (ret)
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
index 578c2057524d..db3cb6d4d0d4 100644
--- a/drivers/nvdimm/btt.h
+++ b/drivers/nvdimm/btt.h
@@ -27,6 +27,7 @@
27#define MAP_ERR_MASK (1 << MAP_ERR_SHIFT) 27#define MAP_ERR_MASK (1 << MAP_ERR_SHIFT)
28#define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT))) 28#define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT)))
29#define MAP_ENT_NORMAL 0xC0000000 29#define MAP_ENT_NORMAL 0xC0000000
30#define LOG_GRP_SIZE sizeof(struct log_group)
30#define LOG_ENT_SIZE sizeof(struct log_entry) 31#define LOG_ENT_SIZE sizeof(struct log_entry)
31#define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */ 32#define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */
32#define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */ 33#define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */
@@ -50,12 +51,52 @@ enum btt_init_state {
50 INIT_READY 51 INIT_READY
51}; 52};
52 53
54/*
55 * A log group represents one log 'lane', and consists of four log entries.
56 * Two of the four entries are valid entries, and the remaining two are
57 * padding. Due to an old bug in the padding location, we need to perform a
58 * test to determine the padding scheme being used, and use that scheme
59 * thereafter.
60 *
61 * In kernels prior to 4.15, 'log group' would have actual log entries at
62 * indices (0, 2) and padding at indices (1, 3), where as the correct/updated
63 * format has log entries at indices (0, 1) and padding at indices (2, 3).
64 *
65 * Old (pre 4.15) format:
66 * +-----------------+-----------------+
67 * | ent[0] | ent[1] |
68 * | 16B | 16B |
69 * | lba/old/new/seq | pad |
70 * +-----------------------------------+
71 * | ent[2] | ent[3] |
72 * | 16B | 16B |
73 * | lba/old/new/seq | pad |
74 * +-----------------+-----------------+
75 *
76 * New format:
77 * +-----------------+-----------------+
78 * | ent[0] | ent[1] |
79 * | 16B | 16B |
80 * | lba/old/new/seq | lba/old/new/seq |
81 * +-----------------------------------+
82 * | ent[2] | ent[3] |
83 * | 16B | 16B |
84 * | pad | pad |
85 * +-----------------+-----------------+
86 *
87 * We detect during start-up which format is in use, and set
88 * arena->log_index[(0, 1)] with the detected format.
89 */
90
53struct log_entry { 91struct log_entry {
54 __le32 lba; 92 __le32 lba;
55 __le32 old_map; 93 __le32 old_map;
56 __le32 new_map; 94 __le32 new_map;
57 __le32 seq; 95 __le32 seq;
58 __le64 padding[2]; 96};
97
98struct log_group {
99 struct log_entry ent[4];
59}; 100};
60 101
61struct btt_sb { 102struct btt_sb {
@@ -125,6 +166,8 @@ struct aligned_lock {
125 * @list: List head for list of arenas 166 * @list: List head for list of arenas
126 * @debugfs_dir: Debugfs dentry 167 * @debugfs_dir: Debugfs dentry
127 * @flags: Arena flags - may signify error states. 168 * @flags: Arena flags - may signify error states.
169 * @err_lock: Mutex for synchronizing error clearing.
170 * @log_index: Indices of the valid log entries in a log_group
128 * 171 *
129 * arena_info is a per-arena handle. Once an arena is narrowed down for an 172 * arena_info is a per-arena handle. Once an arena is narrowed down for an
130 * IO, this struct is passed around for the duration of the IO. 173 * IO, this struct is passed around for the duration of the IO.
@@ -157,6 +200,7 @@ struct arena_info {
157 /* Arena flags */ 200 /* Arena flags */
158 u32 flags; 201 u32 flags;
159 struct mutex err_lock; 202 struct mutex err_lock;
203 int log_index[2];
160}; 204};
161 205
162/** 206/**
@@ -176,6 +220,7 @@ struct arena_info {
176 * @init_lock: Mutex used for the BTT initialization 220 * @init_lock: Mutex used for the BTT initialization
177 * @init_state: Flag describing the initialization state for the BTT 221 * @init_state: Flag describing the initialization state for the BTT
178 * @num_arenas: Number of arenas in the BTT instance 222 * @num_arenas: Number of arenas in the BTT instance
223 * @phys_bb: Pointer to the namespace's badblocks structure
179 */ 224 */
180struct btt { 225struct btt {
181 struct gendisk *btt_disk; 226 struct gendisk *btt_disk;
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 65cc171c721d..2adada1a5855 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -364,9 +364,9 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
364int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) 364int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
365{ 365{
366 u64 checksum, offset; 366 u64 checksum, offset;
367 unsigned long align;
368 enum nd_pfn_mode mode; 367 enum nd_pfn_mode mode;
369 struct nd_namespace_io *nsio; 368 struct nd_namespace_io *nsio;
369 unsigned long align, start_pad;
370 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 370 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
371 struct nd_namespace_common *ndns = nd_pfn->ndns; 371 struct nd_namespace_common *ndns = nd_pfn->ndns;
372 const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev); 372 const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev);
@@ -410,6 +410,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
410 410
411 align = le32_to_cpu(pfn_sb->align); 411 align = le32_to_cpu(pfn_sb->align);
412 offset = le64_to_cpu(pfn_sb->dataoff); 412 offset = le64_to_cpu(pfn_sb->dataoff);
413 start_pad = le32_to_cpu(pfn_sb->start_pad);
413 if (align == 0) 414 if (align == 0)
414 align = 1UL << ilog2(offset); 415 align = 1UL << ilog2(offset);
415 mode = le32_to_cpu(pfn_sb->mode); 416 mode = le32_to_cpu(pfn_sb->mode);
@@ -468,7 +469,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
468 return -EBUSY; 469 return -EBUSY;
469 } 470 }
470 471
471 if ((align && !IS_ALIGNED(offset, align)) 472 if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align))
472 || !IS_ALIGNED(offset, PAGE_SIZE)) { 473 || !IS_ALIGNED(offset, PAGE_SIZE)) {
473 dev_err(&nd_pfn->dev, 474 dev_err(&nd_pfn->dev,
474 "bad offset: %#llx dax disabled align: %#lx\n", 475 "bad offset: %#llx dax disabled align: %#lx\n",
@@ -582,6 +583,12 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
582 return altmap; 583 return altmap;
583} 584}
584 585
586static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys)
587{
588 return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys),
589 ALIGN_DOWN(phys, nd_pfn->align));
590}
591
585static int nd_pfn_init(struct nd_pfn *nd_pfn) 592static int nd_pfn_init(struct nd_pfn *nd_pfn)
586{ 593{
587 u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0; 594 u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0;
@@ -637,13 +644,16 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
637 start = nsio->res.start; 644 start = nsio->res.start;
638 size = PHYS_SECTION_ALIGN_UP(start + size) - start; 645 size = PHYS_SECTION_ALIGN_UP(start + size) - start;
639 if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, 646 if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
640 IORES_DESC_NONE) == REGION_MIXED) { 647 IORES_DESC_NONE) == REGION_MIXED
648 || !IS_ALIGNED(start + resource_size(&nsio->res),
649 nd_pfn->align)) {
641 size = resource_size(&nsio->res); 650 size = resource_size(&nsio->res);
642 end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size); 651 end_trunc = start + size - phys_pmem_align_down(nd_pfn,
652 start + size);
643 } 653 }
644 654
645 if (start_pad + end_trunc) 655 if (start_pad + end_trunc)
646 dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n", 656 dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n",
647 dev_name(&ndns->dev), start_pad + end_trunc); 657 dev_name(&ndns->dev), start_pad + end_trunc);
648 658
649 /* 659 /*
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f837d666cbd4..1e46e60b8f10 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1287,7 +1287,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl,
1287 BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < 1287 BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
1288 NVME_DSM_MAX_RANGES); 1288 NVME_DSM_MAX_RANGES);
1289 1289
1290 queue->limits.discard_alignment = size; 1290 queue->limits.discard_alignment = 0;
1291 queue->limits.discard_granularity = size; 1291 queue->limits.discard_granularity = size;
1292 1292
1293 blk_queue_max_discard_sectors(queue, UINT_MAX); 1293 blk_queue_max_discard_sectors(queue, UINT_MAX);
@@ -1705,7 +1705,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
1705 blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); 1705 blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
1706 blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); 1706 blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
1707 } 1707 }
1708 if (ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) 1708 if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
1709 is_power_of_2(ctrl->max_hw_sectors))
1709 blk_queue_chunk_sectors(q, ctrl->max_hw_sectors); 1710 blk_queue_chunk_sectors(q, ctrl->max_hw_sectors);
1710 blk_queue_virt_boundary(q, ctrl->page_size - 1); 1711 blk_queue_virt_boundary(q, ctrl->page_size - 1);
1711 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) 1712 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
@@ -2869,7 +2870,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2869 2870
2870 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); 2871 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
2871 nvme_set_queue_limits(ctrl, ns->queue); 2872 nvme_set_queue_limits(ctrl, ns->queue);
2872 nvme_setup_streams_ns(ctrl, ns);
2873 2873
2874 id = nvme_identify_ns(ctrl, nsid); 2874 id = nvme_identify_ns(ctrl, nsid);
2875 if (!id) 2875 if (!id)
@@ -2880,6 +2880,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2880 2880
2881 if (nvme_init_ns_head(ns, nsid, id, &new)) 2881 if (nvme_init_ns_head(ns, nsid, id, &new))
2882 goto out_free_id; 2882 goto out_free_id;
2883 nvme_setup_streams_ns(ctrl, ns);
2883 2884
2884#ifdef CONFIG_NVME_MULTIPATH 2885#ifdef CONFIG_NVME_MULTIPATH
2885 /* 2886 /*
@@ -2965,8 +2966,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
2965 return; 2966 return;
2966 2967
2967 if (ns->disk && ns->disk->flags & GENHD_FL_UP) { 2968 if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
2968 if (blk_get_integrity(ns->disk))
2969 blk_integrity_unregister(ns->disk);
2970 nvme_mpath_remove_disk_links(ns); 2969 nvme_mpath_remove_disk_links(ns);
2971 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, 2970 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
2972 &nvme_ns_id_attr_group); 2971 &nvme_ns_id_attr_group);
@@ -2974,6 +2973,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
2974 nvme_nvm_unregister_sysfs(ns); 2973 nvme_nvm_unregister_sysfs(ns);
2975 del_gendisk(ns->disk); 2974 del_gendisk(ns->disk);
2976 blk_cleanup_queue(ns->queue); 2975 blk_cleanup_queue(ns->queue);
2976 if (blk_get_integrity(ns->disk))
2977 blk_integrity_unregister(ns->disk);
2977 } 2978 }
2978 2979
2979 mutex_lock(&ns->ctrl->subsys->lock); 2980 mutex_lock(&ns->ctrl->subsys->lock);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 0a8af4daef89..794e66e4aa20 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3221,7 +3221,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
3221 3221
3222 /* initiate nvme ctrl ref counting teardown */ 3222 /* initiate nvme ctrl ref counting teardown */
3223 nvme_uninit_ctrl(&ctrl->ctrl); 3223 nvme_uninit_ctrl(&ctrl->ctrl);
3224 nvme_put_ctrl(&ctrl->ctrl);
3225 3224
3226 /* Remove core ctrl ref. */ 3225 /* Remove core ctrl ref. */
3227 nvme_put_ctrl(&ctrl->ctrl); 3226 nvme_put_ctrl(&ctrl->ctrl);
diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c
index a25fed52f7e9..41b740aed3a3 100644
--- a/drivers/parisc/lba_pci.c
+++ b/drivers/parisc/lba_pci.c
@@ -1692,3 +1692,36 @@ void lba_set_iregs(struct parisc_device *lba, u32 ibase, u32 imask)
1692 iounmap(base_addr); 1692 iounmap(base_addr);
1693} 1693}
1694 1694
1695
1696/*
1697 * The design of the Diva management card in rp34x0 machines (rp3410, rp3440)
1698 * seems rushed, so that many built-in components simply don't work.
1699 * The following quirks disable the serial AUX port and the built-in ATI RV100
1700 * Radeon 7000 graphics card which both don't have any external connectors and
1701 * thus are useless, and even worse, e.g. the AUX port occupies ttyS0 and as
1702 * such makes those machines the only PARISC machines on which we can't use
1703 * ttyS0 as boot console.
1704 */
1705static void quirk_diva_ati_card(struct pci_dev *dev)
1706{
1707 if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
1708 dev->subsystem_device != 0x1292)
1709 return;
1710
1711 dev_info(&dev->dev, "Hiding Diva built-in ATI card");
1712 dev->device = 0;
1713}
1714DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RADEON_QY,
1715 quirk_diva_ati_card);
1716
1717static void quirk_diva_aux_disable(struct pci_dev *dev)
1718{
1719 if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
1720 dev->subsystem_device != 0x1291)
1721 return;
1722
1723 dev_info(&dev->dev, "Hiding Diva built-in AUX serial device");
1724 dev->device = 0;
1725}
1726DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_DIVA_AUX,
1727 quirk_diva_aux_disable);
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 945099d49f8f..14fd865a5120 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -1012,7 +1012,12 @@ static int pci_pm_thaw_noirq(struct device *dev)
1012 if (pci_has_legacy_pm_support(pci_dev)) 1012 if (pci_has_legacy_pm_support(pci_dev))
1013 return pci_legacy_resume_early(dev); 1013 return pci_legacy_resume_early(dev);
1014 1014
1015 pci_update_current_state(pci_dev, PCI_D0); 1015 /*
1016 * pci_restore_state() requires the device to be in D0 (because of MSI
1017 * restoration among other things), so force it into D0 in case the
1018 * driver's "freeze" callbacks put it into a low-power state directly.
1019 */
1020 pci_set_power_state(pci_dev, PCI_D0);
1016 pci_restore_state(pci_dev); 1021 pci_restore_state(pci_dev);
1017 1022
1018 if (drv && drv->pm && drv->pm->thaw_noirq) 1023 if (drv && drv->pm && drv->pm->thaw_noirq)
diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
index bdedb6325c72..4471fd94e1fe 100644
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
@@ -1620,6 +1620,22 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
1620 clear_bit(i, chip->irq.valid_mask); 1620 clear_bit(i, chip->irq.valid_mask);
1621 } 1621 }
1622 1622
1623 /*
1624 * The same set of machines in chv_no_valid_mask[] have incorrectly
1625 * configured GPIOs that generate spurious interrupts so we use
1626 * this same list to apply another quirk for them.
1627 *
1628 * See also https://bugzilla.kernel.org/show_bug.cgi?id=197953.
1629 */
1630 if (!need_valid_mask) {
1631 /*
1632 * Mask all interrupts the community is able to generate
1633 * but leave the ones that can only generate GPEs unmasked.
1634 */
1635 chv_writel(GENMASK(31, pctrl->community->nirqs),
1636 pctrl->regs + CHV_INTMASK);
1637 }
1638
1623 /* Clear all interrupts */ 1639 /* Clear all interrupts */
1624 chv_writel(0xffff, pctrl->regs + CHV_INTSTAT); 1640 chv_writel(0xffff, pctrl->regs + CHV_INTSTAT);
1625 1641
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 6c815207f4f5..3614df68830f 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5386,6 +5386,13 @@ out:
5386} 5386}
5387EXPORT_SYMBOL_GPL(qeth_poll); 5387EXPORT_SYMBOL_GPL(qeth_poll);
5388 5388
5389static int qeth_setassparms_inspect_rc(struct qeth_ipa_cmd *cmd)
5390{
5391 if (!cmd->hdr.return_code)
5392 cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code;
5393 return cmd->hdr.return_code;
5394}
5395
5389int qeth_setassparms_cb(struct qeth_card *card, 5396int qeth_setassparms_cb(struct qeth_card *card,
5390 struct qeth_reply *reply, unsigned long data) 5397 struct qeth_reply *reply, unsigned long data)
5391{ 5398{
@@ -6242,7 +6249,7 @@ static int qeth_ipa_checksum_run_cmd_cb(struct qeth_card *card,
6242 (struct qeth_checksum_cmd *)reply->param; 6249 (struct qeth_checksum_cmd *)reply->param;
6243 6250
6244 QETH_CARD_TEXT(card, 4, "chkdoccb"); 6251 QETH_CARD_TEXT(card, 4, "chkdoccb");
6245 if (cmd->hdr.return_code) 6252 if (qeth_setassparms_inspect_rc(cmd))
6246 return 0; 6253 return 0;
6247 6254
6248 memset(chksum_cb, 0, sizeof(*chksum_cb)); 6255 memset(chksum_cb, 0, sizeof(*chksum_cb));
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 6e3d81969a77..d52265416da2 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1725,6 +1725,7 @@ struct aac_dev
1725#define FIB_CONTEXT_FLAG_NATIVE_HBA (0x00000010) 1725#define FIB_CONTEXT_FLAG_NATIVE_HBA (0x00000010)
1726#define FIB_CONTEXT_FLAG_NATIVE_HBA_TMF (0x00000020) 1726#define FIB_CONTEXT_FLAG_NATIVE_HBA_TMF (0x00000020)
1727#define FIB_CONTEXT_FLAG_SCSI_CMD (0x00000040) 1727#define FIB_CONTEXT_FLAG_SCSI_CMD (0x00000040)
1728#define FIB_CONTEXT_FLAG_EH_RESET (0x00000080)
1728 1729
1729/* 1730/*
1730 * Define the command values 1731 * Define the command values
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index bdf127aaab41..d55332de08f9 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -1037,7 +1037,7 @@ static int aac_eh_bus_reset(struct scsi_cmnd* cmd)
1037 info = &aac->hba_map[bus][cid]; 1037 info = &aac->hba_map[bus][cid];
1038 if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS || 1038 if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS ||
1039 info->devtype != AAC_DEVTYPE_NATIVE_RAW) { 1039 info->devtype != AAC_DEVTYPE_NATIVE_RAW) {
1040 fib->flags |= FIB_CONTEXT_FLAG_TIMED_OUT; 1040 fib->flags |= FIB_CONTEXT_FLAG_EH_RESET;
1041 cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER; 1041 cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER;
1042 } 1042 }
1043 } 1043 }
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index a4f28b7e4c65..e18877177f1b 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -1576,7 +1576,9 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
1576 return req; 1576 return req;
1577 1577
1578 for_each_bio(bio) { 1578 for_each_bio(bio) {
1579 ret = blk_rq_append_bio(req, bio); 1579 struct bio *bounce_bio = bio;
1580
1581 ret = blk_rq_append_bio(req, &bounce_bio);
1580 if (ret) 1582 if (ret)
1581 return ERR_PTR(ret); 1583 return ERR_PTR(ret);
1582 } 1584 }
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index 449ef5adbb2b..dfb8da83fa50 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -374,10 +374,8 @@ int scsi_dev_info_list_add_keyed(int compatible, char *vendor, char *model,
374 model, compatible); 374 model, compatible);
375 375
376 if (strflags) 376 if (strflags)
377 devinfo->flags = simple_strtoul(strflags, NULL, 0); 377 flags = (__force blist_flags_t)simple_strtoul(strflags, NULL, 0);
378 else 378 devinfo->flags = flags;
379 devinfo->flags = flags;
380
381 devinfo->compatible = compatible; 379 devinfo->compatible = compatible;
382 380
383 if (compatible) 381 if (compatible)
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index be5e919db0e8..0880d975eed3 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -770,7 +770,7 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result,
770 * SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized 770 * SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
771 **/ 771 **/
772static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, 772static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
773 int *bflags, int async) 773 blist_flags_t *bflags, int async)
774{ 774{
775 int ret; 775 int ret;
776 776
@@ -1049,14 +1049,15 @@ static unsigned char *scsi_inq_str(unsigned char *buf, unsigned char *inq,
1049 * - SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized 1049 * - SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
1050 **/ 1050 **/
1051static int scsi_probe_and_add_lun(struct scsi_target *starget, 1051static int scsi_probe_and_add_lun(struct scsi_target *starget,
1052 u64 lun, int *bflagsp, 1052 u64 lun, blist_flags_t *bflagsp,
1053 struct scsi_device **sdevp, 1053 struct scsi_device **sdevp,
1054 enum scsi_scan_mode rescan, 1054 enum scsi_scan_mode rescan,
1055 void *hostdata) 1055 void *hostdata)
1056{ 1056{
1057 struct scsi_device *sdev; 1057 struct scsi_device *sdev;
1058 unsigned char *result; 1058 unsigned char *result;
1059 int bflags, res = SCSI_SCAN_NO_RESPONSE, result_len = 256; 1059 blist_flags_t bflags;
1060 int res = SCSI_SCAN_NO_RESPONSE, result_len = 256;
1060 struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); 1061 struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
1061 1062
1062 /* 1063 /*
@@ -1201,7 +1202,7 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget,
1201 * Modifies sdevscan->lun. 1202 * Modifies sdevscan->lun.
1202 **/ 1203 **/
1203static void scsi_sequential_lun_scan(struct scsi_target *starget, 1204static void scsi_sequential_lun_scan(struct scsi_target *starget,
1204 int bflags, int scsi_level, 1205 blist_flags_t bflags, int scsi_level,
1205 enum scsi_scan_mode rescan) 1206 enum scsi_scan_mode rescan)
1206{ 1207{
1207 uint max_dev_lun; 1208 uint max_dev_lun;
@@ -1292,7 +1293,7 @@ static void scsi_sequential_lun_scan(struct scsi_target *starget,
1292 * 0: scan completed (or no memory, so further scanning is futile) 1293 * 0: scan completed (or no memory, so further scanning is futile)
1293 * 1: could not scan with REPORT LUN 1294 * 1: could not scan with REPORT LUN
1294 **/ 1295 **/
1295static int scsi_report_lun_scan(struct scsi_target *starget, int bflags, 1296static int scsi_report_lun_scan(struct scsi_target *starget, blist_flags_t bflags,
1296 enum scsi_scan_mode rescan) 1297 enum scsi_scan_mode rescan)
1297{ 1298{
1298 unsigned char scsi_cmd[MAX_COMMAND_SIZE]; 1299 unsigned char scsi_cmd[MAX_COMMAND_SIZE];
@@ -1538,7 +1539,7 @@ static void __scsi_scan_target(struct device *parent, unsigned int channel,
1538 unsigned int id, u64 lun, enum scsi_scan_mode rescan) 1539 unsigned int id, u64 lun, enum scsi_scan_mode rescan)
1539{ 1540{
1540 struct Scsi_Host *shost = dev_to_shost(parent); 1541 struct Scsi_Host *shost = dev_to_shost(parent);
1541 int bflags = 0; 1542 blist_flags_t bflags = 0;
1542 int res; 1543 int res;
1543 struct scsi_target *starget; 1544 struct scsi_target *starget;
1544 1545
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 50e7d7e4a861..a9996c16f4ae 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -967,7 +967,8 @@ sdev_show_wwid(struct device *dev, struct device_attribute *attr,
967} 967}
968static DEVICE_ATTR(wwid, S_IRUGO, sdev_show_wwid, NULL); 968static DEVICE_ATTR(wwid, S_IRUGO, sdev_show_wwid, NULL);
969 969
970#define BLIST_FLAG_NAME(name) [ilog2(BLIST_##name)] = #name 970#define BLIST_FLAG_NAME(name) \
971 [ilog2((__force unsigned int)BLIST_##name)] = #name
971static const char *const sdev_bflags_name[] = { 972static const char *const sdev_bflags_name[] = {
972#include "scsi_devinfo_tbl.c" 973#include "scsi_devinfo_tbl.c"
973}; 974};
@@ -984,7 +985,7 @@ sdev_show_blacklist(struct device *dev, struct device_attribute *attr,
984 for (i = 0; i < sizeof(sdev->sdev_bflags) * BITS_PER_BYTE; i++) { 985 for (i = 0; i < sizeof(sdev->sdev_bflags) * BITS_PER_BYTE; i++) {
985 const char *name = NULL; 986 const char *name = NULL;
986 987
987 if (!(sdev->sdev_bflags & BIT(i))) 988 if (!(sdev->sdev_bflags & (__force blist_flags_t)BIT(i)))
988 continue; 989 continue;
989 if (i < ARRAY_SIZE(sdev_bflags_name) && sdev_bflags_name[i]) 990 if (i < ARRAY_SIZE(sdev_bflags_name) && sdev_bflags_name[i])
990 name = sdev_bflags_name[i]; 991 name = sdev_bflags_name[i];
diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c
index d0219e36080c..10ebb213ddb3 100644
--- a/drivers/scsi/scsi_transport_spi.c
+++ b/drivers/scsi/scsi_transport_spi.c
@@ -50,14 +50,14 @@
50 50
51/* Our blacklist flags */ 51/* Our blacklist flags */
52enum { 52enum {
53 SPI_BLIST_NOIUS = 0x1, 53 SPI_BLIST_NOIUS = (__force blist_flags_t)0x1,
54}; 54};
55 55
56/* blacklist table, modelled on scsi_devinfo.c */ 56/* blacklist table, modelled on scsi_devinfo.c */
57static struct { 57static struct {
58 char *vendor; 58 char *vendor;
59 char *model; 59 char *model;
60 unsigned flags; 60 blist_flags_t flags;
61} spi_static_device_list[] __initdata = { 61} spi_static_device_list[] __initdata = {
62 {"HP", "Ultrium 3-SCSI", SPI_BLIST_NOIUS }, 62 {"HP", "Ultrium 3-SCSI", SPI_BLIST_NOIUS },
63 {"IBM", "ULTRIUM-TD3", SPI_BLIST_NOIUS }, 63 {"IBM", "ULTRIUM-TD3", SPI_BLIST_NOIUS },
@@ -221,9 +221,11 @@ static int spi_device_configure(struct transport_container *tc,
221{ 221{
222 struct scsi_device *sdev = to_scsi_device(dev); 222 struct scsi_device *sdev = to_scsi_device(dev);
223 struct scsi_target *starget = sdev->sdev_target; 223 struct scsi_target *starget = sdev->sdev_target;
224 unsigned bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8], 224 blist_flags_t bflags;
225 &sdev->inquiry[16], 225
226 SCSI_DEVINFO_SPI); 226 bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8],
227 &sdev->inquiry[16],
228 SCSI_DEVINFO_SPI);
227 229
228 /* Populate the target capability fields with the values 230 /* Populate the target capability fields with the values
229 * gleaned from the device inquiry */ 231 * gleaned from the device inquiry */
diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c
index 77fe55ce790c..d65345312527 100644
--- a/drivers/spi/spi-armada-3700.c
+++ b/drivers/spi/spi-armada-3700.c
@@ -79,6 +79,7 @@
79#define A3700_SPI_BYTE_LEN BIT(5) 79#define A3700_SPI_BYTE_LEN BIT(5)
80#define A3700_SPI_CLK_PRESCALE BIT(0) 80#define A3700_SPI_CLK_PRESCALE BIT(0)
81#define A3700_SPI_CLK_PRESCALE_MASK (0x1f) 81#define A3700_SPI_CLK_PRESCALE_MASK (0x1f)
82#define A3700_SPI_CLK_EVEN_OFFS (0x10)
82 83
83#define A3700_SPI_WFIFO_THRS_BIT 28 84#define A3700_SPI_WFIFO_THRS_BIT 28
84#define A3700_SPI_RFIFO_THRS_BIT 24 85#define A3700_SPI_RFIFO_THRS_BIT 24
@@ -220,6 +221,13 @@ static void a3700_spi_clock_set(struct a3700_spi *a3700_spi,
220 221
221 prescale = DIV_ROUND_UP(clk_get_rate(a3700_spi->clk), speed_hz); 222 prescale = DIV_ROUND_UP(clk_get_rate(a3700_spi->clk), speed_hz);
222 223
224 /* For prescaler values over 15, we can only set it by steps of 2.
225 * Starting from A3700_SPI_CLK_EVEN_OFFS, we set values from 0 up to
226 * 30. We only use this range from 16 to 30.
227 */
228 if (prescale > 15)
229 prescale = A3700_SPI_CLK_EVEN_OFFS + DIV_ROUND_UP(prescale, 2);
230
223 val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG); 231 val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
224 val = val & ~A3700_SPI_CLK_PRESCALE_MASK; 232 val = val & ~A3700_SPI_CLK_PRESCALE_MASK;
225 233
diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index f95da364c283..669470971023 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -1661,12 +1661,12 @@ static int atmel_spi_remove(struct platform_device *pdev)
1661 pm_runtime_get_sync(&pdev->dev); 1661 pm_runtime_get_sync(&pdev->dev);
1662 1662
1663 /* reset the hardware and block queue progress */ 1663 /* reset the hardware and block queue progress */
1664 spin_lock_irq(&as->lock);
1665 if (as->use_dma) { 1664 if (as->use_dma) {
1666 atmel_spi_stop_dma(master); 1665 atmel_spi_stop_dma(master);
1667 atmel_spi_release_dma(master); 1666 atmel_spi_release_dma(master);
1668 } 1667 }
1669 1668
1669 spin_lock_irq(&as->lock);
1670 spi_writel(as, CR, SPI_BIT(SWRST)); 1670 spi_writel(as, CR, SPI_BIT(SWRST));
1671 spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */ 1671 spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */
1672 spi_readl(as, SR); 1672 spi_readl(as, SR);
diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index 2ce875764ca6..0835a8d88fb8 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -377,8 +377,8 @@ static int qspi_set_config_register(struct rspi_data *rspi, int access_size)
377 /* Sets SPCMD */ 377 /* Sets SPCMD */
378 rspi_write16(rspi, rspi->spcmd, RSPI_SPCMD0); 378 rspi_write16(rspi, rspi->spcmd, RSPI_SPCMD0);
379 379
380 /* Enables SPI function in master mode */ 380 /* Sets RSPI mode */
381 rspi_write8(rspi, SPCR_SPE | SPCR_MSTR, RSPI_SPCR); 381 rspi_write8(rspi, SPCR_MSTR, RSPI_SPCR);
382 382
383 return 0; 383 return 0;
384} 384}
diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c
index c5cd635c28f3..41410031f8e9 100644
--- a/drivers/spi/spi-sun4i.c
+++ b/drivers/spi/spi-sun4i.c
@@ -525,7 +525,7 @@ err_free_master:
525 525
526static int sun4i_spi_remove(struct platform_device *pdev) 526static int sun4i_spi_remove(struct platform_device *pdev)
527{ 527{
528 pm_runtime_disable(&pdev->dev); 528 pm_runtime_force_suspend(&pdev->dev);
529 529
530 return 0; 530 return 0;
531} 531}
diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c
index bc7100b93dfc..e0b9fe1d0e37 100644
--- a/drivers/spi/spi-xilinx.c
+++ b/drivers/spi/spi-xilinx.c
@@ -271,6 +271,7 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
271 while (remaining_words) { 271 while (remaining_words) {
272 int n_words, tx_words, rx_words; 272 int n_words, tx_words, rx_words;
273 u32 sr; 273 u32 sr;
274 int stalled;
274 275
275 n_words = min(remaining_words, xspi->buffer_size); 276 n_words = min(remaining_words, xspi->buffer_size);
276 277
@@ -299,7 +300,17 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
299 300
300 /* Read out all the data from the Rx FIFO */ 301 /* Read out all the data from the Rx FIFO */
301 rx_words = n_words; 302 rx_words = n_words;
303 stalled = 10;
302 while (rx_words) { 304 while (rx_words) {
305 if (rx_words == n_words && !(stalled--) &&
306 !(sr & XSPI_SR_TX_EMPTY_MASK) &&
307 (sr & XSPI_SR_RX_EMPTY_MASK)) {
308 dev_err(&spi->dev,
309 "Detected stall. Check C_SPI_MODE and C_SPI_MEMORY\n");
310 xspi_init_hw(xspi);
311 return -EIO;
312 }
313
303 if ((sr & XSPI_SR_TX_EMPTY_MASK) && (rx_words > 1)) { 314 if ((sr & XSPI_SR_TX_EMPTY_MASK) && (rx_words > 1)) {
304 xilinx_spi_rx(xspi); 315 xilinx_spi_rx(xspi);
305 rx_words--; 316 rx_words--;
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 7c69b4a9694d..0d99b242e82e 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -920,7 +920,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
920 " %d i: %d bio: %p, allocating another" 920 " %d i: %d bio: %p, allocating another"
921 " bio\n", bio->bi_vcnt, i, bio); 921 " bio\n", bio->bi_vcnt, i, bio);
922 922
923 rc = blk_rq_append_bio(req, bio); 923 rc = blk_rq_append_bio(req, &bio);
924 if (rc) { 924 if (rc) {
925 pr_err("pSCSI: failed to append bio\n"); 925 pr_err("pSCSI: failed to append bio\n");
926 goto fail; 926 goto fail;
@@ -938,7 +938,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
938 } 938 }
939 939
940 if (bio) { 940 if (bio) {
941 rc = blk_rq_append_bio(req, bio); 941 rc = blk_rq_append_bio(req, &bio);
942 if (rc) { 942 if (rc) {
943 pr_err("pSCSI: failed to append bio\n"); 943 pr_err("pSCSI: failed to append bio\n");
944 goto fail; 944 goto fail;
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index f77e499afddd..065f0b607373 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -257,10 +257,25 @@ static void release_memory_resource(struct resource *resource)
257 kfree(resource); 257 kfree(resource);
258} 258}
259 259
260/*
261 * Host memory not allocated to dom0. We can use this range for hotplug-based
262 * ballooning.
263 *
264 * It's a type-less resource. Setting IORESOURCE_MEM will make resource
265 * management algorithms (arch_remove_reservations()) look into guest e820,
266 * which we don't want.
267 */
268static struct resource hostmem_resource = {
269 .name = "Host RAM",
270};
271
272void __attribute__((weak)) __init arch_xen_balloon_init(struct resource *res)
273{}
274
260static struct resource *additional_memory_resource(phys_addr_t size) 275static struct resource *additional_memory_resource(phys_addr_t size)
261{ 276{
262 struct resource *res; 277 struct resource *res, *res_hostmem;
263 int ret; 278 int ret = -ENOMEM;
264 279
265 res = kzalloc(sizeof(*res), GFP_KERNEL); 280 res = kzalloc(sizeof(*res), GFP_KERNEL);
266 if (!res) 281 if (!res)
@@ -269,13 +284,42 @@ static struct resource *additional_memory_resource(phys_addr_t size)
269 res->name = "System RAM"; 284 res->name = "System RAM";
270 res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; 285 res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
271 286
272 ret = allocate_resource(&iomem_resource, res, 287 res_hostmem = kzalloc(sizeof(*res), GFP_KERNEL);
273 size, 0, -1, 288 if (res_hostmem) {
274 PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); 289 /* Try to grab a range from hostmem */
275 if (ret < 0) { 290 res_hostmem->name = "Host memory";
276 pr_err("Cannot allocate new System RAM resource\n"); 291 ret = allocate_resource(&hostmem_resource, res_hostmem,
277 kfree(res); 292 size, 0, -1,
278 return NULL; 293 PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
294 }
295
296 if (!ret) {
297 /*
298 * Insert this resource into iomem. Because hostmem_resource
299 * tracks portion of guest e820 marked as UNUSABLE noone else
300 * should try to use it.
301 */
302 res->start = res_hostmem->start;
303 res->end = res_hostmem->end;
304 ret = insert_resource(&iomem_resource, res);
305 if (ret < 0) {
306 pr_err("Can't insert iomem_resource [%llx - %llx]\n",
307 res->start, res->end);
308 release_memory_resource(res_hostmem);
309 res_hostmem = NULL;
310 res->start = res->end = 0;
311 }
312 }
313
314 if (ret) {
315 ret = allocate_resource(&iomem_resource, res,
316 size, 0, -1,
317 PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
318 if (ret < 0) {
319 pr_err("Cannot allocate new System RAM resource\n");
320 kfree(res);
321 return NULL;
322 }
279 } 323 }
280 324
281#ifdef CONFIG_SPARSEMEM 325#ifdef CONFIG_SPARSEMEM
@@ -287,6 +331,7 @@ static struct resource *additional_memory_resource(phys_addr_t size)
287 pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n", 331 pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n",
288 pfn, limit); 332 pfn, limit);
289 release_memory_resource(res); 333 release_memory_resource(res);
334 release_memory_resource(res_hostmem);
290 return NULL; 335 return NULL;
291 } 336 }
292 } 337 }
@@ -765,6 +810,8 @@ static int __init balloon_init(void)
765 set_online_page_callback(&xen_online_page); 810 set_online_page_callback(&xen_online_page);
766 register_memory_notifier(&xen_memory_nb); 811 register_memory_notifier(&xen_memory_nb);
767 register_sysctl_table(xen_root); 812 register_sysctl_table(xen_root);
813
814 arch_xen_balloon_init(&hostmem_resource);
768#endif 815#endif
769 816
770#ifdef CONFIG_XEN_PV 817#ifdef CONFIG_XEN_PV
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 0da80019a917..83ed7715f856 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -702,7 +702,7 @@ xfs_alloc_ag_vextent(
702 ASSERT(args->agbno % args->alignment == 0); 702 ASSERT(args->agbno % args->alignment == 0);
703 703
704 /* if not file data, insert new block into the reverse map btree */ 704 /* if not file data, insert new block into the reverse map btree */
705 if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) { 705 if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
706 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno, 706 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
707 args->agbno, args->len, &args->oinfo); 707 args->agbno, args->len, &args->oinfo);
708 if (error) 708 if (error)
@@ -1682,7 +1682,7 @@ xfs_free_ag_extent(
1682 bno_cur = cnt_cur = NULL; 1682 bno_cur = cnt_cur = NULL;
1683 mp = tp->t_mountp; 1683 mp = tp->t_mountp;
1684 1684
1685 if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) { 1685 if (!xfs_rmap_should_skip_owner_update(oinfo)) {
1686 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo); 1686 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1687 if (error) 1687 if (error)
1688 goto error0; 1688 goto error0;
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 6249c92671de..a76914db72ef 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -212,6 +212,7 @@ xfs_attr_set(
212 int flags) 212 int flags)
213{ 213{
214 struct xfs_mount *mp = dp->i_mount; 214 struct xfs_mount *mp = dp->i_mount;
215 struct xfs_buf *leaf_bp = NULL;
215 struct xfs_da_args args; 216 struct xfs_da_args args;
216 struct xfs_defer_ops dfops; 217 struct xfs_defer_ops dfops;
217 struct xfs_trans_res tres; 218 struct xfs_trans_res tres;
@@ -327,9 +328,16 @@ xfs_attr_set(
327 * GROT: another possible req'mt for a double-split btree op. 328 * GROT: another possible req'mt for a double-split btree op.
328 */ 329 */
329 xfs_defer_init(args.dfops, args.firstblock); 330 xfs_defer_init(args.dfops, args.firstblock);
330 error = xfs_attr_shortform_to_leaf(&args); 331 error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
331 if (error) 332 if (error)
332 goto out_defer_cancel; 333 goto out_defer_cancel;
334 /*
335 * Prevent the leaf buffer from being unlocked so that a
336 * concurrent AIL push cannot grab the half-baked leaf
337 * buffer and run into problems with the write verifier.
338 */
339 xfs_trans_bhold(args.trans, leaf_bp);
340 xfs_defer_bjoin(args.dfops, leaf_bp);
333 xfs_defer_ijoin(args.dfops, dp); 341 xfs_defer_ijoin(args.dfops, dp);
334 error = xfs_defer_finish(&args.trans, args.dfops); 342 error = xfs_defer_finish(&args.trans, args.dfops);
335 if (error) 343 if (error)
@@ -337,13 +345,14 @@ xfs_attr_set(
337 345
338 /* 346 /*
339 * Commit the leaf transformation. We'll need another (linked) 347 * Commit the leaf transformation. We'll need another (linked)
340 * transaction to add the new attribute to the leaf. 348 * transaction to add the new attribute to the leaf, which
349 * means that we have to hold & join the leaf buffer here too.
341 */ 350 */
342
343 error = xfs_trans_roll_inode(&args.trans, dp); 351 error = xfs_trans_roll_inode(&args.trans, dp);
344 if (error) 352 if (error)
345 goto out; 353 goto out;
346 354 xfs_trans_bjoin(args.trans, leaf_bp);
355 leaf_bp = NULL;
347 } 356 }
348 357
349 if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) 358 if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
@@ -374,8 +383,9 @@ xfs_attr_set(
374 383
375out_defer_cancel: 384out_defer_cancel:
376 xfs_defer_cancel(&dfops); 385 xfs_defer_cancel(&dfops);
377 args.trans = NULL;
378out: 386out:
387 if (leaf_bp)
388 xfs_trans_brelse(args.trans, leaf_bp);
379 if (args.trans) 389 if (args.trans)
380 xfs_trans_cancel(args.trans); 390 xfs_trans_cancel(args.trans);
381 xfs_iunlock(dp, XFS_ILOCK_EXCL); 391 xfs_iunlock(dp, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 53cc8b986eac..601eaa36f1ad 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -735,10 +735,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
735} 735}
736 736
737/* 737/*
738 * Convert from using the shortform to the leaf. 738 * Convert from using the shortform to the leaf. On success, return the
739 * buffer so that we can keep it locked until we're totally done with it.
739 */ 740 */
740int 741int
741xfs_attr_shortform_to_leaf(xfs_da_args_t *args) 742xfs_attr_shortform_to_leaf(
743 struct xfs_da_args *args,
744 struct xfs_buf **leaf_bp)
742{ 745{
743 xfs_inode_t *dp; 746 xfs_inode_t *dp;
744 xfs_attr_shortform_t *sf; 747 xfs_attr_shortform_t *sf;
@@ -818,7 +821,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
818 sfe = XFS_ATTR_SF_NEXTENTRY(sfe); 821 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
819 } 822 }
820 error = 0; 823 error = 0;
821 824 *leaf_bp = bp;
822out: 825out:
823 kmem_free(tmpbuffer); 826 kmem_free(tmpbuffer);
824 return error; 827 return error;
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index f7dda0c237b0..894124efb421 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -48,7 +48,8 @@ void xfs_attr_shortform_create(struct xfs_da_args *args);
48void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff); 48void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
49int xfs_attr_shortform_lookup(struct xfs_da_args *args); 49int xfs_attr_shortform_lookup(struct xfs_da_args *args);
50int xfs_attr_shortform_getvalue(struct xfs_da_args *args); 50int xfs_attr_shortform_getvalue(struct xfs_da_args *args);
51int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); 51int xfs_attr_shortform_to_leaf(struct xfs_da_args *args,
52 struct xfs_buf **leaf_bp);
52int xfs_attr_shortform_remove(struct xfs_da_args *args); 53int xfs_attr_shortform_remove(struct xfs_da_args *args);
53int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); 54int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
54int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes); 55int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 1210f684d3c2..1bddbba6b80c 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5136,7 +5136,7 @@ __xfs_bunmapi(
5136 * blowing out the transaction with a mix of EFIs and reflink 5136 * blowing out the transaction with a mix of EFIs and reflink
5137 * adjustments. 5137 * adjustments.
5138 */ 5138 */
5139 if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) 5139 if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
5140 max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res)); 5140 max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
5141 else 5141 else
5142 max_len = len; 5142 max_len = len;
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 072ebfe1d6ae..087fea02c389 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -249,6 +249,10 @@ xfs_defer_trans_roll(
249 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) 249 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
250 xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE); 250 xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE);
251 251
252 /* Hold the (previously bjoin'd) buffer locked across the roll. */
253 for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++)
254 xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]);
255
252 trace_xfs_defer_trans_roll((*tp)->t_mountp, dop); 256 trace_xfs_defer_trans_roll((*tp)->t_mountp, dop);
253 257
254 /* Roll the transaction. */ 258 /* Roll the transaction. */
@@ -264,6 +268,12 @@ xfs_defer_trans_roll(
264 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) 268 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
265 xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0); 269 xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
266 270
271 /* Rejoin the buffers and dirty them so the log moves forward. */
272 for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) {
273 xfs_trans_bjoin(*tp, dop->dop_bufs[i]);
274 xfs_trans_bhold(*tp, dop->dop_bufs[i]);
275 }
276
267 return error; 277 return error;
268} 278}
269 279
@@ -295,6 +305,31 @@ xfs_defer_ijoin(
295 } 305 }
296 } 306 }
297 307
308 ASSERT(0);
309 return -EFSCORRUPTED;
310}
311
312/*
313 * Add this buffer to the deferred op. Each joined buffer is relogged
314 * each time we roll the transaction.
315 */
316int
317xfs_defer_bjoin(
318 struct xfs_defer_ops *dop,
319 struct xfs_buf *bp)
320{
321 int i;
322
323 for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) {
324 if (dop->dop_bufs[i] == bp)
325 return 0;
326 else if (dop->dop_bufs[i] == NULL) {
327 dop->dop_bufs[i] = bp;
328 return 0;
329 }
330 }
331
332 ASSERT(0);
298 return -EFSCORRUPTED; 333 return -EFSCORRUPTED;
299} 334}
300 335
@@ -493,9 +528,7 @@ xfs_defer_init(
493 struct xfs_defer_ops *dop, 528 struct xfs_defer_ops *dop,
494 xfs_fsblock_t *fbp) 529 xfs_fsblock_t *fbp)
495{ 530{
496 dop->dop_committed = false; 531 memset(dop, 0, sizeof(struct xfs_defer_ops));
497 dop->dop_low = false;
498 memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes));
499 *fbp = NULLFSBLOCK; 532 *fbp = NULLFSBLOCK;
500 INIT_LIST_HEAD(&dop->dop_intake); 533 INIT_LIST_HEAD(&dop->dop_intake);
501 INIT_LIST_HEAD(&dop->dop_pending); 534 INIT_LIST_HEAD(&dop->dop_pending);
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index d4f046dd44bd..045beacdd37d 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -59,6 +59,7 @@ enum xfs_defer_ops_type {
59}; 59};
60 60
61#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ 61#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */
62#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */
62 63
63struct xfs_defer_ops { 64struct xfs_defer_ops {
64 bool dop_committed; /* did any trans commit? */ 65 bool dop_committed; /* did any trans commit? */
@@ -66,8 +67,9 @@ struct xfs_defer_ops {
66 struct list_head dop_intake; /* unlogged pending work */ 67 struct list_head dop_intake; /* unlogged pending work */
67 struct list_head dop_pending; /* logged pending work */ 68 struct list_head dop_pending; /* logged pending work */
68 69
69 /* relog these inodes with each roll */ 70 /* relog these with each roll */
70 struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES]; 71 struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES];
72 struct xfs_buf *dop_bufs[XFS_DEFER_OPS_NR_BUFS];
71}; 73};
72 74
73void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type, 75void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
@@ -77,6 +79,7 @@ void xfs_defer_cancel(struct xfs_defer_ops *dop);
77void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp); 79void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp);
78bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop); 80bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
79int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip); 81int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
82int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp);
80 83
81/* Description of a deferred type. */ 84/* Description of a deferred type. */
82struct xfs_defer_op_type { 85struct xfs_defer_op_type {
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index 89bf16b4d937..b0f31791c7e6 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -632,8 +632,6 @@ xfs_iext_insert(
632 struct xfs_iext_leaf *new = NULL; 632 struct xfs_iext_leaf *new = NULL;
633 int nr_entries, i; 633 int nr_entries, i;
634 634
635 trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
636
637 if (ifp->if_height == 0) 635 if (ifp->if_height == 0)
638 xfs_iext_alloc_root(ifp, cur); 636 xfs_iext_alloc_root(ifp, cur);
639 else if (ifp->if_height == 1) 637 else if (ifp->if_height == 1)
@@ -661,6 +659,8 @@ xfs_iext_insert(
661 xfs_iext_set(cur_rec(cur), irec); 659 xfs_iext_set(cur_rec(cur), irec);
662 ifp->if_bytes += sizeof(struct xfs_iext_rec); 660 ifp->if_bytes += sizeof(struct xfs_iext_rec);
663 661
662 trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
663
664 if (new) 664 if (new)
665 xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2); 665 xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
666} 666}
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 585b35d34142..c40d26763075 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1488,27 +1488,12 @@ __xfs_refcount_cow_alloc(
1488 xfs_extlen_t aglen, 1488 xfs_extlen_t aglen,
1489 struct xfs_defer_ops *dfops) 1489 struct xfs_defer_ops *dfops)
1490{ 1490{
1491 int error;
1492
1493 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, 1491 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno,
1494 agbno, aglen); 1492 agbno, aglen);
1495 1493
1496 /* Add refcount btree reservation */ 1494 /* Add refcount btree reservation */
1497 error = xfs_refcount_adjust_cow(rcur, agbno, aglen, 1495 return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1498 XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops); 1496 XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops);
1499 if (error)
1500 return error;
1501
1502 /* Add rmap entry */
1503 if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
1504 error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops,
1505 rcur->bc_private.a.agno,
1506 agbno, aglen, XFS_RMAP_OWN_COW);
1507 if (error)
1508 return error;
1509 }
1510
1511 return error;
1512} 1497}
1513 1498
1514/* 1499/*
@@ -1521,27 +1506,12 @@ __xfs_refcount_cow_free(
1521 xfs_extlen_t aglen, 1506 xfs_extlen_t aglen,
1522 struct xfs_defer_ops *dfops) 1507 struct xfs_defer_ops *dfops)
1523{ 1508{
1524 int error;
1525
1526 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, 1509 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno,
1527 agbno, aglen); 1510 agbno, aglen);
1528 1511
1529 /* Remove refcount btree reservation */ 1512 /* Remove refcount btree reservation */
1530 error = xfs_refcount_adjust_cow(rcur, agbno, aglen, 1513 return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1531 XFS_REFCOUNT_ADJUST_COW_FREE, dfops); 1514 XFS_REFCOUNT_ADJUST_COW_FREE, dfops);
1532 if (error)
1533 return error;
1534
1535 /* Remove rmap entry */
1536 if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
1537 error = xfs_rmap_free_extent(rcur->bc_mp, dfops,
1538 rcur->bc_private.a.agno,
1539 agbno, aglen, XFS_RMAP_OWN_COW);
1540 if (error)
1541 return error;
1542 }
1543
1544 return error;
1545} 1515}
1546 1516
1547/* Record a CoW staging extent in the refcount btree. */ 1517/* Record a CoW staging extent in the refcount btree. */
@@ -1552,11 +1522,19 @@ xfs_refcount_alloc_cow_extent(
1552 xfs_fsblock_t fsb, 1522 xfs_fsblock_t fsb,
1553 xfs_extlen_t len) 1523 xfs_extlen_t len)
1554{ 1524{
1525 int error;
1526
1555 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1527 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1556 return 0; 1528 return 0;
1557 1529
1558 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, 1530 error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
1559 fsb, len); 1531 fsb, len);
1532 if (error)
1533 return error;
1534
1535 /* Add rmap entry */
1536 return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
1537 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
1560} 1538}
1561 1539
1562/* Forget a CoW staging event in the refcount btree. */ 1540/* Forget a CoW staging event in the refcount btree. */
@@ -1567,9 +1545,17 @@ xfs_refcount_free_cow_extent(
1567 xfs_fsblock_t fsb, 1545 xfs_fsblock_t fsb,
1568 xfs_extlen_t len) 1546 xfs_extlen_t len)
1569{ 1547{
1548 int error;
1549
1570 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1550 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1571 return 0; 1551 return 0;
1572 1552
1553 /* Remove rmap entry */
1554 error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
1555 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
1556 if (error)
1557 return error;
1558
1573 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW, 1559 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW,
1574 fsb, len); 1560 fsb, len);
1575} 1561}
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index dd019cee1b3b..50db920ceeeb 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -368,6 +368,51 @@ xfs_rmap_lookup_le_range(
368} 368}
369 369
370/* 370/*
371 * Perform all the relevant owner checks for a removal op. If we're doing an
372 * unknown-owner removal then we have no owner information to check.
373 */
374static int
375xfs_rmap_free_check_owner(
376 struct xfs_mount *mp,
377 uint64_t ltoff,
378 struct xfs_rmap_irec *rec,
379 xfs_fsblock_t bno,
380 xfs_filblks_t len,
381 uint64_t owner,
382 uint64_t offset,
383 unsigned int flags)
384{
385 int error = 0;
386
387 if (owner == XFS_RMAP_OWN_UNKNOWN)
388 return 0;
389
390 /* Make sure the unwritten flag matches. */
391 XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
392 (rec->rm_flags & XFS_RMAP_UNWRITTEN), out);
393
394 /* Make sure the owner matches what we expect to find in the tree. */
395 XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out);
396
397 /* Check the offset, if necessary. */
398 if (XFS_RMAP_NON_INODE_OWNER(owner))
399 goto out;
400
401 if (flags & XFS_RMAP_BMBT_BLOCK) {
402 XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK,
403 out);
404 } else {
405 XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out);
406 XFS_WANT_CORRUPTED_GOTO(mp,
407 ltoff + rec->rm_blockcount >= offset + len,
408 out);
409 }
410
411out:
412 return error;
413}
414
415/*
371 * Find the extent in the rmap btree and remove it. 416 * Find the extent in the rmap btree and remove it.
372 * 417 *
373 * The record we find should always be an exact match for the extent that we're 418 * The record we find should always be an exact match for the extent that we're
@@ -444,33 +489,40 @@ xfs_rmap_unmap(
444 goto out_done; 489 goto out_done;
445 } 490 }
446 491
447 /* Make sure the unwritten flag matches. */ 492 /*
448 XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == 493 * If we're doing an unknown-owner removal for EFI recovery, we expect
449 (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error); 494 * to find the full range in the rmapbt or nothing at all. If we
495 * don't find any rmaps overlapping either end of the range, we're
496 * done. Hopefully this means that the EFI creator already queued
497 * (and finished) a RUI to remove the rmap.
498 */
499 if (owner == XFS_RMAP_OWN_UNKNOWN &&
500 ltrec.rm_startblock + ltrec.rm_blockcount <= bno) {
501 struct xfs_rmap_irec rtrec;
502
503 error = xfs_btree_increment(cur, 0, &i);
504 if (error)
505 goto out_error;
506 if (i == 0)
507 goto out_done;
508 error = xfs_rmap_get_rec(cur, &rtrec, &i);
509 if (error)
510 goto out_error;
511 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
512 if (rtrec.rm_startblock >= bno + len)
513 goto out_done;
514 }
450 515
451 /* Make sure the extent we found covers the entire freeing range. */ 516 /* Make sure the extent we found covers the entire freeing range. */
452 XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno && 517 XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno &&
453 ltrec.rm_startblock + ltrec.rm_blockcount >= 518 ltrec.rm_startblock + ltrec.rm_blockcount >=
454 bno + len, out_error); 519 bno + len, out_error);
455 520
456 /* Make sure the owner matches what we expect to find in the tree. */ 521 /* Check owner information. */
457 XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner || 522 error = xfs_rmap_free_check_owner(mp, ltoff, &ltrec, bno, len, owner,
458 XFS_RMAP_NON_INODE_OWNER(owner), out_error); 523 offset, flags);
459 524 if (error)
460 /* Check the offset, if necessary. */ 525 goto out_error;
461 if (!XFS_RMAP_NON_INODE_OWNER(owner)) {
462 if (flags & XFS_RMAP_BMBT_BLOCK) {
463 XFS_WANT_CORRUPTED_GOTO(mp,
464 ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK,
465 out_error);
466 } else {
467 XFS_WANT_CORRUPTED_GOTO(mp,
468 ltrec.rm_offset <= offset, out_error);
469 XFS_WANT_CORRUPTED_GOTO(mp,
470 ltoff + ltrec.rm_blockcount >= offset + len,
471 out_error);
472 }
473 }
474 526
475 if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { 527 if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
476 /* exact match, simply remove the record from rmap tree */ 528 /* exact match, simply remove the record from rmap tree */
@@ -664,6 +716,7 @@ xfs_rmap_map(
664 flags |= XFS_RMAP_UNWRITTEN; 716 flags |= XFS_RMAP_UNWRITTEN;
665 trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len, 717 trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len,
666 unwritten, oinfo); 718 unwritten, oinfo);
719 ASSERT(!xfs_rmap_should_skip_owner_update(oinfo));
667 720
668 /* 721 /*
669 * For the initial lookup, look for an exact match or the left-adjacent 722 * For the initial lookup, look for an exact match or the left-adjacent
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 466ede637080..0fcd5b1ba729 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -61,7 +61,21 @@ static inline void
61xfs_rmap_skip_owner_update( 61xfs_rmap_skip_owner_update(
62 struct xfs_owner_info *oi) 62 struct xfs_owner_info *oi)
63{ 63{
64 oi->oi_owner = XFS_RMAP_OWN_UNKNOWN; 64 xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_NULL);
65}
66
67static inline bool
68xfs_rmap_should_skip_owner_update(
69 struct xfs_owner_info *oi)
70{
71 return oi->oi_owner == XFS_RMAP_OWN_NULL;
72}
73
74static inline void
75xfs_rmap_any_owner_update(
76 struct xfs_owner_info *oi)
77{
78 xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_UNKNOWN);
65} 79}
66 80
67/* Reverse mapping functions. */ 81/* Reverse mapping functions. */
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 44f8c5451210..64da90655e95 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -538,7 +538,7 @@ xfs_efi_recover(
538 return error; 538 return error;
539 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); 539 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
540 540
541 xfs_rmap_skip_owner_update(&oinfo); 541 xfs_rmap_any_owner_update(&oinfo);
542 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 542 for (i = 0; i < efip->efi_format.efi_nextents; i++) {
543 extp = &efip->efi_format.efi_extents[i]; 543 extp = &efip->efi_format.efi_extents[i];
544 error = xfs_trans_free_extent(tp, efdp, extp->ext_start, 544 error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 8f22fc579dbb..60a2e128cb6a 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -571,6 +571,11 @@ xfs_growfs_data_private(
571 * this doesn't actually exist in the rmap btree. 571 * this doesn't actually exist in the rmap btree.
572 */ 572 */
573 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL); 573 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
574 error = xfs_rmap_free(tp, bp, agno,
575 be32_to_cpu(agf->agf_length) - new,
576 new, &oinfo);
577 if (error)
578 goto error0;
574 error = xfs_free_extent(tp, 579 error = xfs_free_extent(tp,
575 XFS_AGB_TO_FSB(mp, agno, 580 XFS_AGB_TO_FSB(mp, agno,
576 be32_to_cpu(agf->agf_length) - new), 581 be32_to_cpu(agf->agf_length) - new),
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 43005fbe8b1e..3861d61fb265 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -870,7 +870,7 @@ xfs_eofblocks_worker(
870 * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default). 870 * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default).
871 * (We'll just piggyback on the post-EOF prealloc space workqueue.) 871 * (We'll just piggyback on the post-EOF prealloc space workqueue.)
872 */ 872 */
873STATIC void 873void
874xfs_queue_cowblocks( 874xfs_queue_cowblocks(
875 struct xfs_mount *mp) 875 struct xfs_mount *mp)
876{ 876{
@@ -1536,8 +1536,23 @@ xfs_inode_free_quota_eofblocks(
1536 return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks); 1536 return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks);
1537} 1537}
1538 1538
1539static inline unsigned long
1540xfs_iflag_for_tag(
1541 int tag)
1542{
1543 switch (tag) {
1544 case XFS_ICI_EOFBLOCKS_TAG:
1545 return XFS_IEOFBLOCKS;
1546 case XFS_ICI_COWBLOCKS_TAG:
1547 return XFS_ICOWBLOCKS;
1548 default:
1549 ASSERT(0);
1550 return 0;
1551 }
1552}
1553
1539static void 1554static void
1540__xfs_inode_set_eofblocks_tag( 1555__xfs_inode_set_blocks_tag(
1541 xfs_inode_t *ip, 1556 xfs_inode_t *ip,
1542 void (*execute)(struct xfs_mount *mp), 1557 void (*execute)(struct xfs_mount *mp),
1543 void (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, 1558 void (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
@@ -1552,10 +1567,10 @@ __xfs_inode_set_eofblocks_tag(
1552 * Don't bother locking the AG and looking up in the radix trees 1567 * Don't bother locking the AG and looking up in the radix trees
1553 * if we already know that we have the tag set. 1568 * if we already know that we have the tag set.
1554 */ 1569 */
1555 if (ip->i_flags & XFS_IEOFBLOCKS) 1570 if (ip->i_flags & xfs_iflag_for_tag(tag))
1556 return; 1571 return;
1557 spin_lock(&ip->i_flags_lock); 1572 spin_lock(&ip->i_flags_lock);
1558 ip->i_flags |= XFS_IEOFBLOCKS; 1573 ip->i_flags |= xfs_iflag_for_tag(tag);
1559 spin_unlock(&ip->i_flags_lock); 1574 spin_unlock(&ip->i_flags_lock);
1560 1575
1561 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 1576 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1587,13 +1602,13 @@ xfs_inode_set_eofblocks_tag(
1587 xfs_inode_t *ip) 1602 xfs_inode_t *ip)
1588{ 1603{
1589 trace_xfs_inode_set_eofblocks_tag(ip); 1604 trace_xfs_inode_set_eofblocks_tag(ip);
1590 return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_eofblocks, 1605 return __xfs_inode_set_blocks_tag(ip, xfs_queue_eofblocks,
1591 trace_xfs_perag_set_eofblocks, 1606 trace_xfs_perag_set_eofblocks,
1592 XFS_ICI_EOFBLOCKS_TAG); 1607 XFS_ICI_EOFBLOCKS_TAG);
1593} 1608}
1594 1609
1595static void 1610static void
1596__xfs_inode_clear_eofblocks_tag( 1611__xfs_inode_clear_blocks_tag(
1597 xfs_inode_t *ip, 1612 xfs_inode_t *ip,
1598 void (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, 1613 void (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
1599 int error, unsigned long caller_ip), 1614 int error, unsigned long caller_ip),
@@ -1603,7 +1618,7 @@ __xfs_inode_clear_eofblocks_tag(
1603 struct xfs_perag *pag; 1618 struct xfs_perag *pag;
1604 1619
1605 spin_lock(&ip->i_flags_lock); 1620 spin_lock(&ip->i_flags_lock);
1606 ip->i_flags &= ~XFS_IEOFBLOCKS; 1621 ip->i_flags &= ~xfs_iflag_for_tag(tag);
1607 spin_unlock(&ip->i_flags_lock); 1622 spin_unlock(&ip->i_flags_lock);
1608 1623
1609 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 1624 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1630,7 +1645,7 @@ xfs_inode_clear_eofblocks_tag(
1630 xfs_inode_t *ip) 1645 xfs_inode_t *ip)
1631{ 1646{
1632 trace_xfs_inode_clear_eofblocks_tag(ip); 1647 trace_xfs_inode_clear_eofblocks_tag(ip);
1633 return __xfs_inode_clear_eofblocks_tag(ip, 1648 return __xfs_inode_clear_blocks_tag(ip,
1634 trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG); 1649 trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG);
1635} 1650}
1636 1651
@@ -1724,7 +1739,7 @@ xfs_inode_set_cowblocks_tag(
1724 xfs_inode_t *ip) 1739 xfs_inode_t *ip)
1725{ 1740{
1726 trace_xfs_inode_set_cowblocks_tag(ip); 1741 trace_xfs_inode_set_cowblocks_tag(ip);
1727 return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks, 1742 return __xfs_inode_set_blocks_tag(ip, xfs_queue_cowblocks,
1728 trace_xfs_perag_set_cowblocks, 1743 trace_xfs_perag_set_cowblocks,
1729 XFS_ICI_COWBLOCKS_TAG); 1744 XFS_ICI_COWBLOCKS_TAG);
1730} 1745}
@@ -1734,6 +1749,6 @@ xfs_inode_clear_cowblocks_tag(
1734 xfs_inode_t *ip) 1749 xfs_inode_t *ip)
1735{ 1750{
1736 trace_xfs_inode_clear_cowblocks_tag(ip); 1751 trace_xfs_inode_clear_cowblocks_tag(ip);
1737 return __xfs_inode_clear_eofblocks_tag(ip, 1752 return __xfs_inode_clear_blocks_tag(ip,
1738 trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG); 1753 trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG);
1739} 1754}
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index bff4d85e5498..d4a77588eca1 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -81,6 +81,7 @@ void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip);
81int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *); 81int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *);
82int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip); 82int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip);
83void xfs_cowblocks_worker(struct work_struct *); 83void xfs_cowblocks_worker(struct work_struct *);
84void xfs_queue_cowblocks(struct xfs_mount *);
84 85
85int xfs_inode_ag_iterator(struct xfs_mount *mp, 86int xfs_inode_ag_iterator(struct xfs_mount *mp,
86 int (*execute)(struct xfs_inode *ip, int flags, void *args), 87 int (*execute)(struct xfs_inode *ip, int flags, void *args),
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b41952a4ddd8..6f95bdb408ce 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1487,6 +1487,24 @@ xfs_link(
1487 return error; 1487 return error;
1488} 1488}
1489 1489
1490/* Clear the reflink flag and the cowblocks tag if possible. */
1491static void
1492xfs_itruncate_clear_reflink_flags(
1493 struct xfs_inode *ip)
1494{
1495 struct xfs_ifork *dfork;
1496 struct xfs_ifork *cfork;
1497
1498 if (!xfs_is_reflink_inode(ip))
1499 return;
1500 dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1501 cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK);
1502 if (dfork->if_bytes == 0 && cfork->if_bytes == 0)
1503 ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
1504 if (cfork->if_bytes == 0)
1505 xfs_inode_clear_cowblocks_tag(ip);
1506}
1507
1490/* 1508/*
1491 * Free up the underlying blocks past new_size. The new size must be smaller 1509 * Free up the underlying blocks past new_size. The new size must be smaller
1492 * than the current size. This routine can be used both for the attribute and 1510 * than the current size. This routine can be used both for the attribute and
@@ -1583,15 +1601,7 @@ xfs_itruncate_extents(
1583 if (error) 1601 if (error)
1584 goto out; 1602 goto out;
1585 1603
1586 /* 1604 xfs_itruncate_clear_reflink_flags(ip);
1587 * Clear the reflink flag if there are no data fork blocks and
1588 * there are no extents staged in the cow fork.
1589 */
1590 if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
1591 if (ip->i_d.di_nblocks == 0)
1592 ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
1593 xfs_inode_clear_cowblocks_tag(ip);
1594 }
1595 1605
1596 /* 1606 /*
1597 * Always re-log the inode so that our permanent transaction can keep 1607 * Always re-log the inode so that our permanent transaction can keep
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index b2136af9289f..d383e392ec9d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -232,6 +232,7 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
232 * log recovery to replay a bmap operation on the inode. 232 * log recovery to replay a bmap operation on the inode.
233 */ 233 */
234#define XFS_IRECOVERY (1 << 11) 234#define XFS_IRECOVERY (1 << 11)
235#define XFS_ICOWBLOCKS (1 << 12)/* has the cowblocks tag set */
235 236
236/* 237/*
237 * Per-lifetime flags need to be reset when re-using a reclaimable inode during 238 * Per-lifetime flags need to be reset when re-using a reclaimable inode during
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index cf7c8f81bebb..47aea2e82c26 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -454,6 +454,8 @@ retry:
454 if (error) 454 if (error)
455 goto out_bmap_cancel; 455 goto out_bmap_cancel;
456 456
457 xfs_inode_set_cowblocks_tag(ip);
458
457 /* Finish up. */ 459 /* Finish up. */
458 error = xfs_defer_finish(&tp, &dfops); 460 error = xfs_defer_finish(&tp, &dfops);
459 if (error) 461 if (error)
@@ -490,8 +492,9 @@ xfs_reflink_find_cow_mapping(
490 struct xfs_iext_cursor icur; 492 struct xfs_iext_cursor icur;
491 493
492 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); 494 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
493 ASSERT(xfs_is_reflink_inode(ip));
494 495
496 if (!xfs_is_reflink_inode(ip))
497 return false;
495 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 498 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
496 if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got)) 499 if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got))
497 return false; 500 return false;
@@ -610,6 +613,9 @@ xfs_reflink_cancel_cow_blocks(
610 613
611 /* Remove the mapping from the CoW fork. */ 614 /* Remove the mapping from the CoW fork. */
612 xfs_bmap_del_extent_cow(ip, &icur, &got, &del); 615 xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
616 } else {
617 /* Didn't do anything, push cursor back. */
618 xfs_iext_prev(ifp, &icur);
613 } 619 }
614next_extent: 620next_extent:
615 if (!xfs_iext_get_extent(ifp, &icur, &got)) 621 if (!xfs_iext_get_extent(ifp, &icur, &got))
@@ -725,7 +731,7 @@ xfs_reflink_end_cow(
725 (unsigned int)(end_fsb - offset_fsb), 731 (unsigned int)(end_fsb - offset_fsb),
726 XFS_DATA_FORK); 732 XFS_DATA_FORK);
727 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, 733 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
728 resblks, 0, 0, &tp); 734 resblks, 0, XFS_TRANS_RESERVE, &tp);
729 if (error) 735 if (error)
730 goto out; 736 goto out;
731 737
@@ -1291,6 +1297,17 @@ xfs_reflink_remap_range(
1291 1297
1292 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); 1298 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
1293 1299
1300 /*
1301 * Clear out post-eof preallocations because we don't have page cache
1302 * backing the delayed allocations and they'll never get freed on
1303 * their own.
1304 */
1305 if (xfs_can_free_eofblocks(dest, true)) {
1306 ret = xfs_free_eofblocks(dest);
1307 if (ret)
1308 goto out_unlock;
1309 }
1310
1294 /* Set flags and remap blocks. */ 1311 /* Set flags and remap blocks. */
1295 ret = xfs_reflink_set_inode_flag(src, dest); 1312 ret = xfs_reflink_set_inode_flag(src, dest);
1296 if (ret) 1313 if (ret)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 5122d3021117..1dacccc367f8 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1360,6 +1360,7 @@ xfs_fs_remount(
1360 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1360 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1361 return error; 1361 return error;
1362 } 1362 }
1363 xfs_queue_cowblocks(mp);
1363 1364
1364 /* Create the per-AG metadata reservation pool .*/ 1365 /* Create the per-AG metadata reservation pool .*/
1365 error = xfs_fs_reserve_ag_blocks(mp); 1366 error = xfs_fs_reserve_ag_blocks(mp);
@@ -1369,6 +1370,14 @@ xfs_fs_remount(
1369 1370
1370 /* rw -> ro */ 1371 /* rw -> ro */
1371 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) { 1372 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) {
1373 /* Get rid of any leftover CoW reservations... */
1374 cancel_delayed_work_sync(&mp->m_cowblocks_work);
1375 error = xfs_icache_free_cowblocks(mp, NULL);
1376 if (error) {
1377 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1378 return error;
1379 }
1380
1372 /* Free the per-AG metadata reservation pool. */ 1381 /* Free the per-AG metadata reservation pool. */
1373 error = xfs_fs_unreserve_ag_blocks(mp); 1382 error = xfs_fs_unreserve_ag_blocks(mp);
1374 if (error) { 1383 if (error) {
diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h
index ea189d88a3cc..8ac4e68a12f0 100644
--- a/include/asm-generic/mm_hooks.h
+++ b/include/asm-generic/mm_hooks.h
@@ -7,9 +7,10 @@
7#ifndef _ASM_GENERIC_MM_HOOKS_H 7#ifndef _ASM_GENERIC_MM_HOOKS_H
8#define _ASM_GENERIC_MM_HOOKS_H 8#define _ASM_GENERIC_MM_HOOKS_H
9 9
10static inline void arch_dup_mmap(struct mm_struct *oldmm, 10static inline int arch_dup_mmap(struct mm_struct *oldmm,
11 struct mm_struct *mm) 11 struct mm_struct *mm)
12{ 12{
13 return 0;
13} 14}
14 15
15static inline void arch_exit_mmap(struct mm_struct *mm) 16static inline void arch_exit_mmap(struct mm_struct *mm)
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index b234d54f2cb6..868e68561f91 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1025,6 +1025,11 @@ static inline int pmd_clear_huge(pmd_t *pmd)
1025struct file; 1025struct file;
1026int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, 1026int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
1027 unsigned long size, pgprot_t *vma_prot); 1027 unsigned long size, pgprot_t *vma_prot);
1028
1029#ifndef CONFIG_X86_ESPFIX64
1030static inline void init_espfix_bsp(void) { }
1031#endif
1032
1028#endif /* !__ASSEMBLY__ */ 1033#endif /* !__ASSEMBLY__ */
1029 1034
1030#ifndef io_remap_pfn_range 1035#ifndef io_remap_pfn_range
diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h
index cceafa01f907..b67404fc4b34 100644
--- a/include/crypto/mcryptd.h
+++ b/include/crypto/mcryptd.h
@@ -27,6 +27,7 @@ static inline struct mcryptd_ahash *__mcryptd_ahash_cast(
27 27
28struct mcryptd_cpu_queue { 28struct mcryptd_cpu_queue {
29 struct crypto_queue queue; 29 struct crypto_queue queue;
30 spinlock_t q_lock;
30 struct work_struct work; 31 struct work_struct work;
31}; 32};
32 33
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 6e45608b2399..9da6ce22803f 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -62,7 +62,7 @@ struct arch_timer_cpu {
62 bool enabled; 62 bool enabled;
63}; 63};
64 64
65int kvm_timer_hyp_init(void); 65int kvm_timer_hyp_init(bool);
66int kvm_timer_enable(struct kvm_vcpu *vcpu); 66int kvm_timer_enable(struct kvm_vcpu *vcpu);
67int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu); 67int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu);
68void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); 68void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 82f0c8fd7be8..23d29b39f71e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -492,6 +492,8 @@ extern unsigned int bvec_nr_vecs(unsigned short idx);
492 492
493#define bio_set_dev(bio, bdev) \ 493#define bio_set_dev(bio, bdev) \
494do { \ 494do { \
495 if ((bio)->bi_disk != (bdev)->bd_disk) \
496 bio_clear_flag(bio, BIO_THROTTLED);\
495 (bio)->bi_disk = (bdev)->bd_disk; \ 497 (bio)->bi_disk = (bdev)->bd_disk; \
496 (bio)->bi_partno = (bdev)->bd_partno; \ 498 (bio)->bi_partno = (bdev)->bd_partno; \
497} while (0) 499} while (0)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index a1e628e032da..9e7d8bd776d2 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -50,8 +50,6 @@ struct blk_issue_stat {
50struct bio { 50struct bio {
51 struct bio *bi_next; /* request queue link */ 51 struct bio *bi_next; /* request queue link */
52 struct gendisk *bi_disk; 52 struct gendisk *bi_disk;
53 u8 bi_partno;
54 blk_status_t bi_status;
55 unsigned int bi_opf; /* bottom bits req flags, 53 unsigned int bi_opf; /* bottom bits req flags,
56 * top bits REQ_OP. Use 54 * top bits REQ_OP. Use
57 * accessors. 55 * accessors.
@@ -59,8 +57,8 @@ struct bio {
59 unsigned short bi_flags; /* status, etc and bvec pool number */ 57 unsigned short bi_flags; /* status, etc and bvec pool number */
60 unsigned short bi_ioprio; 58 unsigned short bi_ioprio;
61 unsigned short bi_write_hint; 59 unsigned short bi_write_hint;
62 60 blk_status_t bi_status;
63 struct bvec_iter bi_iter; 61 u8 bi_partno;
64 62
65 /* Number of segments in this BIO after 63 /* Number of segments in this BIO after
66 * physical address coalescing is performed. 64 * physical address coalescing is performed.
@@ -74,8 +72,9 @@ struct bio {
74 unsigned int bi_seg_front_size; 72 unsigned int bi_seg_front_size;
75 unsigned int bi_seg_back_size; 73 unsigned int bi_seg_back_size;
76 74
77 atomic_t __bi_remaining; 75 struct bvec_iter bi_iter;
78 76
77 atomic_t __bi_remaining;
79 bio_end_io_t *bi_end_io; 78 bio_end_io_t *bi_end_io;
80 79
81 void *bi_private; 80 void *bi_private;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8089ca17db9a..0ce8a372d506 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -135,7 +135,7 @@ typedef __u32 __bitwise req_flags_t;
135struct request { 135struct request {
136 struct list_head queuelist; 136 struct list_head queuelist;
137 union { 137 union {
138 call_single_data_t csd; 138 struct __call_single_data csd;
139 u64 fifo_time; 139 u64 fifo_time;
140 }; 140 };
141 141
@@ -241,14 +241,24 @@ struct request {
241 struct request *next_rq; 241 struct request *next_rq;
242}; 242};
243 243
244static inline bool blk_op_is_scsi(unsigned int op)
245{
246 return op == REQ_OP_SCSI_IN || op == REQ_OP_SCSI_OUT;
247}
248
249static inline bool blk_op_is_private(unsigned int op)
250{
251 return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT;
252}
253
244static inline bool blk_rq_is_scsi(struct request *rq) 254static inline bool blk_rq_is_scsi(struct request *rq)
245{ 255{
246 return req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT; 256 return blk_op_is_scsi(req_op(rq));
247} 257}
248 258
249static inline bool blk_rq_is_private(struct request *rq) 259static inline bool blk_rq_is_private(struct request *rq)
250{ 260{
251 return req_op(rq) == REQ_OP_DRV_IN || req_op(rq) == REQ_OP_DRV_OUT; 261 return blk_op_is_private(req_op(rq));
252} 262}
253 263
254static inline bool blk_rq_is_passthrough(struct request *rq) 264static inline bool blk_rq_is_passthrough(struct request *rq)
@@ -256,6 +266,13 @@ static inline bool blk_rq_is_passthrough(struct request *rq)
256 return blk_rq_is_scsi(rq) || blk_rq_is_private(rq); 266 return blk_rq_is_scsi(rq) || blk_rq_is_private(rq);
257} 267}
258 268
269static inline bool bio_is_passthrough(struct bio *bio)
270{
271 unsigned op = bio_op(bio);
272
273 return blk_op_is_scsi(op) || blk_op_is_private(op);
274}
275
259static inline unsigned short req_get_ioprio(struct request *req) 276static inline unsigned short req_get_ioprio(struct request *req)
260{ 277{
261 return req->ioprio; 278 return req->ioprio;
@@ -948,7 +965,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
948extern void blk_rq_unprep_clone(struct request *rq); 965extern void blk_rq_unprep_clone(struct request *rq);
949extern blk_status_t blk_insert_cloned_request(struct request_queue *q, 966extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
950 struct request *rq); 967 struct request *rq);
951extern int blk_rq_append_bio(struct request *rq, struct bio *bio); 968extern int blk_rq_append_bio(struct request *rq, struct bio **bio);
952extern void blk_delay_queue(struct request_queue *, unsigned long); 969extern void blk_delay_queue(struct request_queue *, unsigned long);
953extern void blk_queue_split(struct request_queue *, struct bio **); 970extern void blk_queue_split(struct request_queue *, struct bio **);
954extern void blk_recount_segments(struct request_queue *, struct bio *); 971extern void blk_recount_segments(struct request_queue *, struct bio *);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index c561b986bab0..1632bb13ad8a 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -15,11 +15,11 @@
15 * In practice this is far bigger than any realistic pointer offset; this limit 15 * In practice this is far bigger than any realistic pointer offset; this limit
16 * ensures that umax_value + (int)off + (int)size cannot overflow a u64. 16 * ensures that umax_value + (int)off + (int)size cannot overflow a u64.
17 */ 17 */
18#define BPF_MAX_VAR_OFF (1ULL << 31) 18#define BPF_MAX_VAR_OFF (1 << 29)
19/* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO]. This ensures 19/* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO]. This ensures
20 * that converting umax_value to int cannot overflow. 20 * that converting umax_value to int cannot overflow.
21 */ 21 */
22#define BPF_MAX_VAR_SIZ INT_MAX 22#define BPF_MAX_VAR_SIZ (1 << 29)
23 23
24/* Liveness marks, used for registers and spilled-regs (in stack slots). 24/* Liveness marks, used for registers and spilled-regs (in stack slots).
25 * Read marks propagate upwards until they find a write mark; they record that 25 * Read marks propagate upwards until they find a write mark; they record that
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index cb18c6290ca8..8415bf1a9776 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -273,7 +273,8 @@ struct ipv6_pinfo {
273 * 100: prefer care-of address 273 * 100: prefer care-of address
274 */ 274 */
275 dontfrag:1, 275 dontfrag:1,
276 autoflowlabel:1; 276 autoflowlabel:1,
277 autoflowlabel_set:1;
277 __u8 min_hopcount; 278 __u8 min_hopcount;
278 __u8 tclass; 279 __u8 tclass;
279 __be32 rcv_flowinfo; 280 __be32 rcv_flowinfo;
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index a2a1318a3d0c..c3d3f04d8cc6 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -915,10 +915,10 @@ enum PDEV_STAT {PDEV_STAT_IDLE, PDEV_STAT_RUN};
915#define LTR_L1SS_PWR_GATE_CHECK_CARD_EN BIT(6) 915#define LTR_L1SS_PWR_GATE_CHECK_CARD_EN BIT(6)
916 916
917enum dev_aspm_mode { 917enum dev_aspm_mode {
918 DEV_ASPM_DISABLE = 0,
919 DEV_ASPM_DYNAMIC, 918 DEV_ASPM_DYNAMIC,
920 DEV_ASPM_BACKDOOR, 919 DEV_ASPM_BACKDOOR,
921 DEV_ASPM_STATIC, 920 DEV_ASPM_STATIC,
921 DEV_ASPM_DISABLE,
922}; 922};
923 923
924/* 924/*
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index a886b51511ab..57b109c6e422 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -556,6 +556,7 @@ struct mlx5_core_sriov {
556}; 556};
557 557
558struct mlx5_irq_info { 558struct mlx5_irq_info {
559 cpumask_var_t mask;
559 char name[MLX5_MAX_IRQ_NAME]; 560 char name[MLX5_MAX_IRQ_NAME];
560}; 561};
561 562
@@ -1048,7 +1049,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
1048 enum mlx5_eq_type type); 1049 enum mlx5_eq_type type);
1049int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); 1050int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
1050int mlx5_start_eqs(struct mlx5_core_dev *dev); 1051int mlx5_start_eqs(struct mlx5_core_dev *dev);
1051int mlx5_stop_eqs(struct mlx5_core_dev *dev); 1052void mlx5_stop_eqs(struct mlx5_core_dev *dev);
1052int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, 1053int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
1053 unsigned int *irqn); 1054 unsigned int *irqn);
1054int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); 1055int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 38a7577a9ce7..d44ec5f41d4a 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -147,7 +147,7 @@ enum {
147 MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771, 147 MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771,
148 MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772, 148 MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772,
149 MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773, 149 MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773,
150 MLX5_CMD_OP_SET_RATE_LIMIT = 0x780, 150 MLX5_CMD_OP_SET_PP_RATE_LIMIT = 0x780,
151 MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781, 151 MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781,
152 MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT = 0x782, 152 MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT = 0x782,
153 MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT = 0x783, 153 MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT = 0x783,
@@ -7239,7 +7239,7 @@ struct mlx5_ifc_add_vxlan_udp_dport_in_bits {
7239 u8 vxlan_udp_port[0x10]; 7239 u8 vxlan_udp_port[0x10];
7240}; 7240};
7241 7241
7242struct mlx5_ifc_set_rate_limit_out_bits { 7242struct mlx5_ifc_set_pp_rate_limit_out_bits {
7243 u8 status[0x8]; 7243 u8 status[0x8];
7244 u8 reserved_at_8[0x18]; 7244 u8 reserved_at_8[0x18];
7245 7245
@@ -7248,7 +7248,7 @@ struct mlx5_ifc_set_rate_limit_out_bits {
7248 u8 reserved_at_40[0x40]; 7248 u8 reserved_at_40[0x40];
7249}; 7249};
7250 7250
7251struct mlx5_ifc_set_rate_limit_in_bits { 7251struct mlx5_ifc_set_pp_rate_limit_in_bits {
7252 u8 opcode[0x10]; 7252 u8 opcode[0x10];
7253 u8 reserved_at_10[0x10]; 7253 u8 reserved_at_10[0x10];
7254 7254
@@ -7261,6 +7261,8 @@ struct mlx5_ifc_set_rate_limit_in_bits {
7261 u8 reserved_at_60[0x20]; 7261 u8 reserved_at_60[0x20];
7262 7262
7263 u8 rate_limit[0x20]; 7263 u8 rate_limit[0x20];
7264
7265 u8 reserved_at_a0[0x160];
7264}; 7266};
7265 7267
7266struct mlx5_ifc_access_register_out_bits { 7268struct mlx5_ifc_access_register_out_bits {
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 7b2170bfd6e7..bc6bb325d1bf 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -126,7 +126,7 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
126 * for that name. This appears in the sysfs "modalias" attribute 126 * for that name. This appears in the sysfs "modalias" attribute
127 * for driver coldplugging, and in uevents used for hotplugging 127 * for driver coldplugging, and in uevents used for hotplugging
128 * @cs_gpio: gpio number of the chipselect line (optional, -ENOENT when 128 * @cs_gpio: gpio number of the chipselect line (optional, -ENOENT when
129 * when not using a GPIO line) 129 * not using a GPIO line)
130 * 130 *
131 * @statistics: statistics for the spi_device 131 * @statistics: statistics for the spi_device
132 * 132 *
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8b8118a7fadb..cb4d92b79cd9 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3226,7 +3226,6 @@ struct cfg80211_ops {
3226 * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN. 3226 * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN.
3227 * @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing 3227 * @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing
3228 * auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH. 3228 * auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH.
3229 * @WIPHY_FLAG_SUPPORTS_SCHED_SCAN: The device supports scheduled scans.
3230 * @WIPHY_FLAG_SUPPORTS_FW_ROAM: The device supports roaming feature in the 3229 * @WIPHY_FLAG_SUPPORTS_FW_ROAM: The device supports roaming feature in the
3231 * firmware. 3230 * firmware.
3232 * @WIPHY_FLAG_AP_UAPSD: The device supports uapsd on AP. 3231 * @WIPHY_FLAG_AP_UAPSD: The device supports uapsd on AP.
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 0105445cab83..8e08b6da72f3 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -694,9 +694,7 @@ struct tc_cls_matchall_offload {
694}; 694};
695 695
696enum tc_clsbpf_command { 696enum tc_clsbpf_command {
697 TC_CLSBPF_ADD, 697 TC_CLSBPF_OFFLOAD,
698 TC_CLSBPF_REPLACE,
699 TC_CLSBPF_DESTROY,
700 TC_CLSBPF_STATS, 698 TC_CLSBPF_STATS,
701}; 699};
702 700
@@ -705,6 +703,7 @@ struct tc_cls_bpf_offload {
705 enum tc_clsbpf_command command; 703 enum tc_clsbpf_command command;
706 struct tcf_exts *exts; 704 struct tcf_exts *exts;
707 struct bpf_prog *prog; 705 struct bpf_prog *prog;
706 struct bpf_prog *oldprog;
708 const char *name; 707 const char *name;
709 bool exts_integrated; 708 bool exts_integrated;
710 u32 gen_flags; 709 u32 gen_flags;
diff --git a/include/trace/events/clk.h b/include/trace/events/clk.h
index 758607226bfd..2cd449328aee 100644
--- a/include/trace/events/clk.h
+++ b/include/trace/events/clk.h
@@ -134,12 +134,12 @@ DECLARE_EVENT_CLASS(clk_parent,
134 134
135 TP_STRUCT__entry( 135 TP_STRUCT__entry(
136 __string( name, core->name ) 136 __string( name, core->name )
137 __string( pname, parent->name ) 137 __string( pname, parent ? parent->name : "none" )
138 ), 138 ),
139 139
140 TP_fast_assign( 140 TP_fast_assign(
141 __assign_str(name, core->name); 141 __assign_str(name, core->name);
142 __assign_str(pname, parent->name); 142 __assign_str(pname, parent ? parent->name : "none");
143 ), 143 ),
144 144
145 TP_printk("%s %s", __get_str(name), __get_str(pname)) 145 TP_printk("%s %s", __get_str(name), __get_str(pname))
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index e4b0b8e09932..2c735a3e6613 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -211,7 +211,7 @@ TRACE_EVENT(kvm_ack_irq,
211 { KVM_TRACE_MMIO_WRITE, "write" } 211 { KVM_TRACE_MMIO_WRITE, "write" }
212 212
213TRACE_EVENT(kvm_mmio, 213TRACE_EVENT(kvm_mmio,
214 TP_PROTO(int type, int len, u64 gpa, u64 val), 214 TP_PROTO(int type, int len, u64 gpa, void *val),
215 TP_ARGS(type, len, gpa, val), 215 TP_ARGS(type, len, gpa, val),
216 216
217 TP_STRUCT__entry( 217 TP_STRUCT__entry(
@@ -225,7 +225,10 @@ TRACE_EVENT(kvm_mmio,
225 __entry->type = type; 225 __entry->type = type;
226 __entry->len = len; 226 __entry->len = len;
227 __entry->gpa = gpa; 227 __entry->gpa = gpa;
228 __entry->val = val; 228 __entry->val = 0;
229 if (val)
230 memcpy(&__entry->val, val,
231 min_t(u32, sizeof(__entry->val), len));
229 ), 232 ),
230 233
231 TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx", 234 TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index 4914b93a23f2..61f410fd74e4 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -44,3 +44,8 @@ static inline void xen_balloon_init(void)
44{ 44{
45} 45}
46#endif 46#endif
47
48#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
49struct resource;
50void arch_xen_balloon_init(struct resource *hostmem_resource);
51#endif
diff --git a/init/main.c b/init/main.c
index e96e3a14533c..7b606fc48482 100644
--- a/init/main.c
+++ b/init/main.c
@@ -504,6 +504,8 @@ static void __init mm_init(void)
504 pgtable_init(); 504 pgtable_init();
505 vmalloc_init(); 505 vmalloc_init();
506 ioremap_huge_init(); 506 ioremap_huge_init();
507 /* Should be run before the first non-init thread is created */
508 init_espfix_bsp();
507} 509}
508 510
509asmlinkage __visible void __init start_kernel(void) 511asmlinkage __visible void __init start_kernel(void)
@@ -679,10 +681,6 @@ asmlinkage __visible void __init start_kernel(void)
679 if (efi_enabled(EFI_RUNTIME_SERVICES)) 681 if (efi_enabled(EFI_RUNTIME_SERVICES))
680 efi_enter_virtual_mode(); 682 efi_enter_virtual_mode();
681#endif 683#endif
682#ifdef CONFIG_X86_ESPFIX64
683 /* Should be run before the first non-init thread is created */
684 init_espfix_bsp();
685#endif
686 thread_stack_cache_init(); 684 thread_stack_cache_init();
687 cred_init(); 685 cred_init();
688 fork_init(); 686 fork_init();
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d4593571c404..04b24876cd23 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1059,6 +1059,11 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
1059 break; 1059 break;
1060 case PTR_TO_STACK: 1060 case PTR_TO_STACK:
1061 pointer_desc = "stack "; 1061 pointer_desc = "stack ";
1062 /* The stack spill tracking logic in check_stack_write()
1063 * and check_stack_read() relies on stack accesses being
1064 * aligned.
1065 */
1066 strict = true;
1062 break; 1067 break;
1063 default: 1068 default:
1064 break; 1069 break;
@@ -1067,6 +1072,29 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
1067 strict); 1072 strict);
1068} 1073}
1069 1074
1075/* truncate register to smaller size (in bytes)
1076 * must be called with size < BPF_REG_SIZE
1077 */
1078static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
1079{
1080 u64 mask;
1081
1082 /* clear high bits in bit representation */
1083 reg->var_off = tnum_cast(reg->var_off, size);
1084
1085 /* fix arithmetic bounds */
1086 mask = ((u64)1 << (size * 8)) - 1;
1087 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
1088 reg->umin_value &= mask;
1089 reg->umax_value &= mask;
1090 } else {
1091 reg->umin_value = 0;
1092 reg->umax_value = mask;
1093 }
1094 reg->smin_value = reg->umin_value;
1095 reg->smax_value = reg->umax_value;
1096}
1097
1070/* check whether memory at (regno + off) is accessible for t = (read | write) 1098/* check whether memory at (regno + off) is accessible for t = (read | write)
1071 * if t==write, value_regno is a register which value is stored into memory 1099 * if t==write, value_regno is a register which value is stored into memory
1072 * if t==read, value_regno is a register which will receive the value from memory 1100 * if t==read, value_regno is a register which will receive the value from memory
@@ -1200,9 +1228,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
1200 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && 1228 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
1201 regs[value_regno].type == SCALAR_VALUE) { 1229 regs[value_regno].type == SCALAR_VALUE) {
1202 /* b/h/w load zero-extends, mark upper bits as known 0 */ 1230 /* b/h/w load zero-extends, mark upper bits as known 0 */
1203 regs[value_regno].var_off = 1231 coerce_reg_to_size(&regs[value_regno], size);
1204 tnum_cast(regs[value_regno].var_off, size);
1205 __update_reg_bounds(&regs[value_regno]);
1206 } 1232 }
1207 return err; 1233 return err;
1208} 1234}
@@ -1282,6 +1308,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
1282 tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off); 1308 tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off);
1283 verbose(env, "invalid variable stack read R%d var_off=%s\n", 1309 verbose(env, "invalid variable stack read R%d var_off=%s\n",
1284 regno, tn_buf); 1310 regno, tn_buf);
1311 return -EACCES;
1285 } 1312 }
1286 off = regs[regno].off + regs[regno].var_off.value; 1313 off = regs[regno].off + regs[regno].var_off.value;
1287 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || 1314 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
@@ -1674,7 +1701,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
1674 return -EINVAL; 1701 return -EINVAL;
1675 } 1702 }
1676 1703
1704 /* With LD_ABS/IND some JITs save/restore skb from r1. */
1677 changes_data = bpf_helper_changes_pkt_data(fn->func); 1705 changes_data = bpf_helper_changes_pkt_data(fn->func);
1706 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
1707 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
1708 func_id_name(func_id), func_id);
1709 return -EINVAL;
1710 }
1678 1711
1679 memset(&meta, 0, sizeof(meta)); 1712 memset(&meta, 0, sizeof(meta));
1680 meta.pkt_access = fn->pkt_access; 1713 meta.pkt_access = fn->pkt_access;
@@ -1766,14 +1799,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
1766 return 0; 1799 return 0;
1767} 1800}
1768 1801
1769static void coerce_reg_to_32(struct bpf_reg_state *reg)
1770{
1771 /* clear high 32 bits */
1772 reg->var_off = tnum_cast(reg->var_off, 4);
1773 /* Update bounds */
1774 __update_reg_bounds(reg);
1775}
1776
1777static bool signed_add_overflows(s64 a, s64 b) 1802static bool signed_add_overflows(s64 a, s64 b)
1778{ 1803{
1779 /* Do the add in u64, where overflow is well-defined */ 1804 /* Do the add in u64, where overflow is well-defined */
@@ -1794,6 +1819,41 @@ static bool signed_sub_overflows(s64 a, s64 b)
1794 return res > a; 1819 return res > a;
1795} 1820}
1796 1821
1822static bool check_reg_sane_offset(struct bpf_verifier_env *env,
1823 const struct bpf_reg_state *reg,
1824 enum bpf_reg_type type)
1825{
1826 bool known = tnum_is_const(reg->var_off);
1827 s64 val = reg->var_off.value;
1828 s64 smin = reg->smin_value;
1829
1830 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
1831 verbose(env, "math between %s pointer and %lld is not allowed\n",
1832 reg_type_str[type], val);
1833 return false;
1834 }
1835
1836 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
1837 verbose(env, "%s pointer offset %d is not allowed\n",
1838 reg_type_str[type], reg->off);
1839 return false;
1840 }
1841
1842 if (smin == S64_MIN) {
1843 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
1844 reg_type_str[type]);
1845 return false;
1846 }
1847
1848 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
1849 verbose(env, "value %lld makes %s pointer be out of bounds\n",
1850 smin, reg_type_str[type]);
1851 return false;
1852 }
1853
1854 return true;
1855}
1856
1797/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. 1857/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
1798 * Caller should also handle BPF_MOV case separately. 1858 * Caller should also handle BPF_MOV case separately.
1799 * If we return -EACCES, caller may want to try again treating pointer as a 1859 * If we return -EACCES, caller may want to try again treating pointer as a
@@ -1830,29 +1890,25 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1830 1890
1831 if (BPF_CLASS(insn->code) != BPF_ALU64) { 1891 if (BPF_CLASS(insn->code) != BPF_ALU64) {
1832 /* 32-bit ALU ops on pointers produce (meaningless) scalars */ 1892 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
1833 if (!env->allow_ptr_leaks) 1893 verbose(env,
1834 verbose(env, 1894 "R%d 32-bit pointer arithmetic prohibited\n",
1835 "R%d 32-bit pointer arithmetic prohibited\n", 1895 dst);
1836 dst);
1837 return -EACCES; 1896 return -EACCES;
1838 } 1897 }
1839 1898
1840 if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { 1899 if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
1841 if (!env->allow_ptr_leaks) 1900 verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
1842 verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n", 1901 dst);
1843 dst);
1844 return -EACCES; 1902 return -EACCES;
1845 } 1903 }
1846 if (ptr_reg->type == CONST_PTR_TO_MAP) { 1904 if (ptr_reg->type == CONST_PTR_TO_MAP) {
1847 if (!env->allow_ptr_leaks) 1905 verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
1848 verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n", 1906 dst);
1849 dst);
1850 return -EACCES; 1907 return -EACCES;
1851 } 1908 }
1852 if (ptr_reg->type == PTR_TO_PACKET_END) { 1909 if (ptr_reg->type == PTR_TO_PACKET_END) {
1853 if (!env->allow_ptr_leaks) 1910 verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
1854 verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n", 1911 dst);
1855 dst);
1856 return -EACCES; 1912 return -EACCES;
1857 } 1913 }
1858 1914
@@ -1862,6 +1918,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1862 dst_reg->type = ptr_reg->type; 1918 dst_reg->type = ptr_reg->type;
1863 dst_reg->id = ptr_reg->id; 1919 dst_reg->id = ptr_reg->id;
1864 1920
1921 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
1922 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
1923 return -EINVAL;
1924
1865 switch (opcode) { 1925 switch (opcode) {
1866 case BPF_ADD: 1926 case BPF_ADD:
1867 /* We can take a fixed offset as long as it doesn't overflow 1927 /* We can take a fixed offset as long as it doesn't overflow
@@ -1915,9 +1975,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1915 case BPF_SUB: 1975 case BPF_SUB:
1916 if (dst_reg == off_reg) { 1976 if (dst_reg == off_reg) {
1917 /* scalar -= pointer. Creates an unknown scalar */ 1977 /* scalar -= pointer. Creates an unknown scalar */
1918 if (!env->allow_ptr_leaks) 1978 verbose(env, "R%d tried to subtract pointer from scalar\n",
1919 verbose(env, "R%d tried to subtract pointer from scalar\n", 1979 dst);
1920 dst);
1921 return -EACCES; 1980 return -EACCES;
1922 } 1981 }
1923 /* We don't allow subtraction from FP, because (according to 1982 /* We don't allow subtraction from FP, because (according to
@@ -1925,9 +1984,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1925 * be able to deal with it. 1984 * be able to deal with it.
1926 */ 1985 */
1927 if (ptr_reg->type == PTR_TO_STACK) { 1986 if (ptr_reg->type == PTR_TO_STACK) {
1928 if (!env->allow_ptr_leaks) 1987 verbose(env, "R%d subtraction from stack pointer prohibited\n",
1929 verbose(env, "R%d subtraction from stack pointer prohibited\n", 1988 dst);
1930 dst);
1931 return -EACCES; 1989 return -EACCES;
1932 } 1990 }
1933 if (known && (ptr_reg->off - smin_val == 1991 if (known && (ptr_reg->off - smin_val ==
@@ -1976,28 +2034,30 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1976 case BPF_AND: 2034 case BPF_AND:
1977 case BPF_OR: 2035 case BPF_OR:
1978 case BPF_XOR: 2036 case BPF_XOR:
1979 /* bitwise ops on pointers are troublesome, prohibit for now. 2037 /* bitwise ops on pointers are troublesome, prohibit. */
1980 * (However, in principle we could allow some cases, e.g. 2038 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
1981 * ptr &= ~3 which would reduce min_value by 3.) 2039 dst, bpf_alu_string[opcode >> 4]);
1982 */
1983 if (!env->allow_ptr_leaks)
1984 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
1985 dst, bpf_alu_string[opcode >> 4]);
1986 return -EACCES; 2040 return -EACCES;
1987 default: 2041 default:
1988 /* other operators (e.g. MUL,LSH) produce non-pointer results */ 2042 /* other operators (e.g. MUL,LSH) produce non-pointer results */
1989 if (!env->allow_ptr_leaks) 2043 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
1990 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n", 2044 dst, bpf_alu_string[opcode >> 4]);
1991 dst, bpf_alu_string[opcode >> 4]);
1992 return -EACCES; 2045 return -EACCES;
1993 } 2046 }
1994 2047
2048 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
2049 return -EINVAL;
2050
1995 __update_reg_bounds(dst_reg); 2051 __update_reg_bounds(dst_reg);
1996 __reg_deduce_bounds(dst_reg); 2052 __reg_deduce_bounds(dst_reg);
1997 __reg_bound_offset(dst_reg); 2053 __reg_bound_offset(dst_reg);
1998 return 0; 2054 return 0;
1999} 2055}
2000 2056
2057/* WARNING: This function does calculations on 64-bit values, but the actual
2058 * execution may occur on 32-bit values. Therefore, things like bitshifts
2059 * need extra checks in the 32-bit case.
2060 */
2001static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, 2061static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2002 struct bpf_insn *insn, 2062 struct bpf_insn *insn,
2003 struct bpf_reg_state *dst_reg, 2063 struct bpf_reg_state *dst_reg,
@@ -2008,12 +2068,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2008 bool src_known, dst_known; 2068 bool src_known, dst_known;
2009 s64 smin_val, smax_val; 2069 s64 smin_val, smax_val;
2010 u64 umin_val, umax_val; 2070 u64 umin_val, umax_val;
2071 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
2011 2072
2012 if (BPF_CLASS(insn->code) != BPF_ALU64) {
2013 /* 32-bit ALU ops are (32,32)->64 */
2014 coerce_reg_to_32(dst_reg);
2015 coerce_reg_to_32(&src_reg);
2016 }
2017 smin_val = src_reg.smin_value; 2073 smin_val = src_reg.smin_value;
2018 smax_val = src_reg.smax_value; 2074 smax_val = src_reg.smax_value;
2019 umin_val = src_reg.umin_value; 2075 umin_val = src_reg.umin_value;
@@ -2021,6 +2077,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2021 src_known = tnum_is_const(src_reg.var_off); 2077 src_known = tnum_is_const(src_reg.var_off);
2022 dst_known = tnum_is_const(dst_reg->var_off); 2078 dst_known = tnum_is_const(dst_reg->var_off);
2023 2079
2080 if (!src_known &&
2081 opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
2082 __mark_reg_unknown(dst_reg);
2083 return 0;
2084 }
2085
2024 switch (opcode) { 2086 switch (opcode) {
2025 case BPF_ADD: 2087 case BPF_ADD:
2026 if (signed_add_overflows(dst_reg->smin_value, smin_val) || 2088 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
@@ -2149,9 +2211,9 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2149 __update_reg_bounds(dst_reg); 2211 __update_reg_bounds(dst_reg);
2150 break; 2212 break;
2151 case BPF_LSH: 2213 case BPF_LSH:
2152 if (umax_val > 63) { 2214 if (umax_val >= insn_bitness) {
2153 /* Shifts greater than 63 are undefined. This includes 2215 /* Shifts greater than 31 or 63 are undefined.
2154 * shifts by a negative number. 2216 * This includes shifts by a negative number.
2155 */ 2217 */
2156 mark_reg_unknown(env, regs, insn->dst_reg); 2218 mark_reg_unknown(env, regs, insn->dst_reg);
2157 break; 2219 break;
@@ -2177,27 +2239,29 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2177 __update_reg_bounds(dst_reg); 2239 __update_reg_bounds(dst_reg);
2178 break; 2240 break;
2179 case BPF_RSH: 2241 case BPF_RSH:
2180 if (umax_val > 63) { 2242 if (umax_val >= insn_bitness) {
2181 /* Shifts greater than 63 are undefined. This includes 2243 /* Shifts greater than 31 or 63 are undefined.
2182 * shifts by a negative number. 2244 * This includes shifts by a negative number.
2183 */ 2245 */
2184 mark_reg_unknown(env, regs, insn->dst_reg); 2246 mark_reg_unknown(env, regs, insn->dst_reg);
2185 break; 2247 break;
2186 } 2248 }
2187 /* BPF_RSH is an unsigned shift, so make the appropriate casts */ 2249 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
2188 if (dst_reg->smin_value < 0) { 2250 * be negative, then either:
2189 if (umin_val) { 2251 * 1) src_reg might be zero, so the sign bit of the result is
2190 /* Sign bit will be cleared */ 2252 * unknown, so we lose our signed bounds
2191 dst_reg->smin_value = 0; 2253 * 2) it's known negative, thus the unsigned bounds capture the
2192 } else { 2254 * signed bounds
2193 /* Lost sign bit information */ 2255 * 3) the signed bounds cross zero, so they tell us nothing
2194 dst_reg->smin_value = S64_MIN; 2256 * about the result
2195 dst_reg->smax_value = S64_MAX; 2257 * If the value in dst_reg is known nonnegative, then again the
2196 } 2258 * unsigned bounts capture the signed bounds.
2197 } else { 2259 * Thus, in all cases it suffices to blow away our signed bounds
2198 dst_reg->smin_value = 2260 * and rely on inferring new ones from the unsigned bounds and
2199 (u64)(dst_reg->smin_value) >> umax_val; 2261 * var_off of the result.
2200 } 2262 */
2263 dst_reg->smin_value = S64_MIN;
2264 dst_reg->smax_value = S64_MAX;
2201 if (src_known) 2265 if (src_known)
2202 dst_reg->var_off = tnum_rshift(dst_reg->var_off, 2266 dst_reg->var_off = tnum_rshift(dst_reg->var_off,
2203 umin_val); 2267 umin_val);
@@ -2213,6 +2277,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2213 break; 2277 break;
2214 } 2278 }
2215 2279
2280 if (BPF_CLASS(insn->code) != BPF_ALU64) {
2281 /* 32-bit ALU ops are (32,32)->32 */
2282 coerce_reg_to_size(dst_reg, 4);
2283 coerce_reg_to_size(&src_reg, 4);
2284 }
2285
2216 __reg_deduce_bounds(dst_reg); 2286 __reg_deduce_bounds(dst_reg);
2217 __reg_bound_offset(dst_reg); 2287 __reg_bound_offset(dst_reg);
2218 return 0; 2288 return 0;
@@ -2227,7 +2297,6 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2227 struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg; 2297 struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg;
2228 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; 2298 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
2229 u8 opcode = BPF_OP(insn->code); 2299 u8 opcode = BPF_OP(insn->code);
2230 int rc;
2231 2300
2232 dst_reg = &regs[insn->dst_reg]; 2301 dst_reg = &regs[insn->dst_reg];
2233 src_reg = NULL; 2302 src_reg = NULL;
@@ -2238,43 +2307,29 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2238 if (src_reg->type != SCALAR_VALUE) { 2307 if (src_reg->type != SCALAR_VALUE) {
2239 if (dst_reg->type != SCALAR_VALUE) { 2308 if (dst_reg->type != SCALAR_VALUE) {
2240 /* Combining two pointers by any ALU op yields 2309 /* Combining two pointers by any ALU op yields
2241 * an arbitrary scalar. 2310 * an arbitrary scalar. Disallow all math except
2311 * pointer subtraction
2242 */ 2312 */
2243 if (!env->allow_ptr_leaks) { 2313 if (opcode == BPF_SUB){
2244 verbose(env, "R%d pointer %s pointer prohibited\n", 2314 mark_reg_unknown(env, regs, insn->dst_reg);
2245 insn->dst_reg, 2315 return 0;
2246 bpf_alu_string[opcode >> 4]);
2247 return -EACCES;
2248 } 2316 }
2249 mark_reg_unknown(env, regs, insn->dst_reg); 2317 verbose(env, "R%d pointer %s pointer prohibited\n",
2250 return 0; 2318 insn->dst_reg,
2319 bpf_alu_string[opcode >> 4]);
2320 return -EACCES;
2251 } else { 2321 } else {
2252 /* scalar += pointer 2322 /* scalar += pointer
2253 * This is legal, but we have to reverse our 2323 * This is legal, but we have to reverse our
2254 * src/dest handling in computing the range 2324 * src/dest handling in computing the range
2255 */ 2325 */
2256 rc = adjust_ptr_min_max_vals(env, insn, 2326 return adjust_ptr_min_max_vals(env, insn,
2257 src_reg, dst_reg); 2327 src_reg, dst_reg);
2258 if (rc == -EACCES && env->allow_ptr_leaks) {
2259 /* scalar += unknown scalar */
2260 __mark_reg_unknown(&off_reg);
2261 return adjust_scalar_min_max_vals(
2262 env, insn,
2263 dst_reg, off_reg);
2264 }
2265 return rc;
2266 } 2328 }
2267 } else if (ptr_reg) { 2329 } else if (ptr_reg) {
2268 /* pointer += scalar */ 2330 /* pointer += scalar */
2269 rc = adjust_ptr_min_max_vals(env, insn, 2331 return adjust_ptr_min_max_vals(env, insn,
2270 dst_reg, src_reg); 2332 dst_reg, src_reg);
2271 if (rc == -EACCES && env->allow_ptr_leaks) {
2272 /* unknown scalar += scalar */
2273 __mark_reg_unknown(dst_reg);
2274 return adjust_scalar_min_max_vals(
2275 env, insn, dst_reg, *src_reg);
2276 }
2277 return rc;
2278 } 2333 }
2279 } else { 2334 } else {
2280 /* Pretend the src is a reg with a known value, since we only 2335 /* Pretend the src is a reg with a known value, since we only
@@ -2283,17 +2338,9 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2283 off_reg.type = SCALAR_VALUE; 2338 off_reg.type = SCALAR_VALUE;
2284 __mark_reg_known(&off_reg, insn->imm); 2339 __mark_reg_known(&off_reg, insn->imm);
2285 src_reg = &off_reg; 2340 src_reg = &off_reg;
2286 if (ptr_reg) { /* pointer += K */ 2341 if (ptr_reg) /* pointer += K */
2287 rc = adjust_ptr_min_max_vals(env, insn, 2342 return adjust_ptr_min_max_vals(env, insn,
2288 ptr_reg, src_reg); 2343 ptr_reg, src_reg);
2289 if (rc == -EACCES && env->allow_ptr_leaks) {
2290 /* unknown scalar += K */
2291 __mark_reg_unknown(dst_reg);
2292 return adjust_scalar_min_max_vals(
2293 env, insn, dst_reg, off_reg);
2294 }
2295 return rc;
2296 }
2297 } 2344 }
2298 2345
2299 /* Got here implies adding two SCALAR_VALUEs */ 2346 /* Got here implies adding two SCALAR_VALUEs */
@@ -2390,17 +2437,20 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
2390 return -EACCES; 2437 return -EACCES;
2391 } 2438 }
2392 mark_reg_unknown(env, regs, insn->dst_reg); 2439 mark_reg_unknown(env, regs, insn->dst_reg);
2393 /* high 32 bits are known zero. */ 2440 coerce_reg_to_size(&regs[insn->dst_reg], 4);
2394 regs[insn->dst_reg].var_off = tnum_cast(
2395 regs[insn->dst_reg].var_off, 4);
2396 __update_reg_bounds(&regs[insn->dst_reg]);
2397 } 2441 }
2398 } else { 2442 } else {
2399 /* case: R = imm 2443 /* case: R = imm
2400 * remember the value we stored into this reg 2444 * remember the value we stored into this reg
2401 */ 2445 */
2402 regs[insn->dst_reg].type = SCALAR_VALUE; 2446 regs[insn->dst_reg].type = SCALAR_VALUE;
2403 __mark_reg_known(regs + insn->dst_reg, insn->imm); 2447 if (BPF_CLASS(insn->code) == BPF_ALU64) {
2448 __mark_reg_known(regs + insn->dst_reg,
2449 insn->imm);
2450 } else {
2451 __mark_reg_known(regs + insn->dst_reg,
2452 (u32)insn->imm);
2453 }
2404 } 2454 }
2405 2455
2406 } else if (opcode > BPF_END) { 2456 } else if (opcode > BPF_END) {
@@ -3431,15 +3481,14 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
3431 return range_within(rold, rcur) && 3481 return range_within(rold, rcur) &&
3432 tnum_in(rold->var_off, rcur->var_off); 3482 tnum_in(rold->var_off, rcur->var_off);
3433 } else { 3483 } else {
3434 /* if we knew anything about the old value, we're not 3484 /* We're trying to use a pointer in place of a scalar.
3435 * equal, because we can't know anything about the 3485 * Even if the scalar was unbounded, this could lead to
3436 * scalar value of the pointer in the new value. 3486 * pointer leaks because scalars are allowed to leak
3487 * while pointers are not. We could make this safe in
3488 * special cases if root is calling us, but it's
3489 * probably not worth the hassle.
3437 */ 3490 */
3438 return rold->umin_value == 0 && 3491 return false;
3439 rold->umax_value == U64_MAX &&
3440 rold->smin_value == S64_MIN &&
3441 rold->smax_value == S64_MAX &&
3442 tnum_is_unknown(rold->var_off);
3443 } 3492 }
3444 case PTR_TO_MAP_VALUE: 3493 case PTR_TO_MAP_VALUE:
3445 /* If the new min/max/var_off satisfy the old ones and 3494 /* If the new min/max/var_off satisfy the old ones and
diff --git a/kernel/fork.c b/kernel/fork.c
index 432eadf6b58c..2295fc69717f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -721,8 +721,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
721 goto out; 721 goto out;
722 } 722 }
723 /* a new mm has just been created */ 723 /* a new mm has just been created */
724 arch_dup_mmap(oldmm, mm); 724 retval = arch_dup_mmap(oldmm, mm);
725 retval = 0;
726out: 725out:
727 up_write(&mm->mmap_sem); 726 up_write(&mm->mmap_sem);
728 flush_tlb_mm(oldmm); 727 flush_tlb_mm(oldmm);
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index aa8812ae6776..9e9748089270 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -435,6 +435,41 @@ loop:
435 return 0; 435 return 0;
436} 436}
437 437
438static int bpf_fill_ld_abs_vlan_push_pop2(struct bpf_test *self)
439{
440 struct bpf_insn *insn;
441
442 insn = kmalloc_array(16, sizeof(*insn), GFP_KERNEL);
443 if (!insn)
444 return -ENOMEM;
445
446 /* Due to func address being non-const, we need to
447 * assemble this here.
448 */
449 insn[0] = BPF_MOV64_REG(R6, R1);
450 insn[1] = BPF_LD_ABS(BPF_B, 0);
451 insn[2] = BPF_LD_ABS(BPF_H, 0);
452 insn[3] = BPF_LD_ABS(BPF_W, 0);
453 insn[4] = BPF_MOV64_REG(R7, R6);
454 insn[5] = BPF_MOV64_IMM(R6, 0);
455 insn[6] = BPF_MOV64_REG(R1, R7);
456 insn[7] = BPF_MOV64_IMM(R2, 1);
457 insn[8] = BPF_MOV64_IMM(R3, 2);
458 insn[9] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
459 bpf_skb_vlan_push_proto.func - __bpf_call_base);
460 insn[10] = BPF_MOV64_REG(R6, R7);
461 insn[11] = BPF_LD_ABS(BPF_B, 0);
462 insn[12] = BPF_LD_ABS(BPF_H, 0);
463 insn[13] = BPF_LD_ABS(BPF_W, 0);
464 insn[14] = BPF_MOV64_IMM(R0, 42);
465 insn[15] = BPF_EXIT_INSN();
466
467 self->u.ptr.insns = insn;
468 self->u.ptr.len = 16;
469
470 return 0;
471}
472
438static int bpf_fill_jump_around_ld_abs(struct bpf_test *self) 473static int bpf_fill_jump_around_ld_abs(struct bpf_test *self)
439{ 474{
440 unsigned int len = BPF_MAXINSNS; 475 unsigned int len = BPF_MAXINSNS;
@@ -6066,6 +6101,14 @@ static struct bpf_test tests[] = {
6066 {}, 6101 {},
6067 { {0x1, 0x42 } }, 6102 { {0x1, 0x42 } },
6068 }, 6103 },
6104 {
6105 "LD_ABS with helper changing skb data",
6106 { },
6107 INTERNAL,
6108 { 0x34 },
6109 { { ETH_HLEN, 42 } },
6110 .fill_helper = bpf_fill_ld_abs_vlan_push_pop2,
6111 },
6069}; 6112};
6070 6113
6071static struct net_device dev; 6114static struct net_device dev;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 84b2dc76f140..b5f940ce0143 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -882,13 +882,10 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
882 if (IS_ERR(dev)) 882 if (IS_ERR(dev))
883 return PTR_ERR(dev); 883 return PTR_ERR(dev);
884 884
885 if (bdi_debug_register(bdi, dev_name(dev))) {
886 device_destroy(bdi_class, dev->devt);
887 return -ENOMEM;
888 }
889 cgwb_bdi_register(bdi); 885 cgwb_bdi_register(bdi);
890 bdi->dev = dev; 886 bdi->dev = dev;
891 887
888 bdi_debug_register(bdi, dev_name(dev));
892 set_bit(WB_registered, &bdi->wb.state); 889 set_bit(WB_registered, &bdi->wb.state);
893 890
894 spin_lock_bh(&bdi_lock); 891 spin_lock_bh(&bdi_lock);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index d0ef0a8e8831..015f465c514b 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1262,19 +1262,20 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev,
1262 struct net_bridge *br = netdev_priv(dev); 1262 struct net_bridge *br = netdev_priv(dev);
1263 int err; 1263 int err;
1264 1264
1265 err = register_netdevice(dev);
1266 if (err)
1267 return err;
1268
1265 if (tb[IFLA_ADDRESS]) { 1269 if (tb[IFLA_ADDRESS]) {
1266 spin_lock_bh(&br->lock); 1270 spin_lock_bh(&br->lock);
1267 br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); 1271 br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
1268 spin_unlock_bh(&br->lock); 1272 spin_unlock_bh(&br->lock);
1269 } 1273 }
1270 1274
1271 err = register_netdevice(dev);
1272 if (err)
1273 return err;
1274
1275 err = br_changelink(dev, tb, data, extack); 1275 err = br_changelink(dev, tb, data, extack);
1276 if (err) 1276 if (err)
1277 unregister_netdevice(dev); 1277 br_dev_delete(dev, NULL);
1278
1278 return err; 1279 return err;
1279} 1280}
1280 1281
diff --git a/net/core/dev.c b/net/core/dev.c
index f47e96b62308..01ee854454a8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3904,7 +3904,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
3904 hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0, 3904 hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
3905 troom > 0 ? troom + 128 : 0, GFP_ATOMIC)) 3905 troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
3906 goto do_drop; 3906 goto do_drop;
3907 if (troom > 0 && __skb_linearize(skb)) 3907 if (skb_linearize(skb))
3908 goto do_drop; 3908 goto do_drop;
3909 } 3909 }
3910 3910
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b797832565d3..60a71be75aea 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -267,7 +267,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
267 spin_lock_bh(&net->nsid_lock); 267 spin_lock_bh(&net->nsid_lock);
268 peer = idr_find(&net->netns_ids, id); 268 peer = idr_find(&net->netns_ids, id);
269 if (peer) 269 if (peer)
270 get_net(peer); 270 peer = maybe_get_net(peer);
271 spin_unlock_bh(&net->nsid_lock); 271 spin_unlock_bh(&net->nsid_lock);
272 rcu_read_unlock(); 272 rcu_read_unlock();
273 273
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a592ca025fc4..a3cb0be4c6f3 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1178,7 +1178,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
1178 u32 d_off; 1178 u32 d_off;
1179 1179
1180 if (!num_frags) 1180 if (!num_frags)
1181 return 0; 1181 goto release;
1182 1182
1183 if (skb_shared(skb) || skb_unclone(skb, gfp_mask)) 1183 if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
1184 return -EINVAL; 1184 return -EINVAL;
@@ -1238,6 +1238,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
1238 __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off); 1238 __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
1239 skb_shinfo(skb)->nr_frags = new_frags; 1239 skb_shinfo(skb)->nr_frags = new_frags;
1240 1240
1241release:
1241 skb_zcopy_clear(skb, false); 1242 skb_zcopy_clear(skb, false);
1242 return 0; 1243 return 0;
1243} 1244}
@@ -3654,8 +3655,6 @@ normal:
3654 3655
3655 skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & 3656 skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
3656 SKBTX_SHARED_FRAG; 3657 SKBTX_SHARED_FRAG;
3657 if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC))
3658 goto err;
3659 3658
3660 while (pos < offset + len) { 3659 while (pos < offset + len) {
3661 if (i >= nfrags) { 3660 if (i >= nfrags) {
@@ -3681,6 +3680,8 @@ normal:
3681 3680
3682 if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) 3681 if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
3683 goto err; 3682 goto err;
3683 if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
3684 goto err;
3684 3685
3685 *nskb_frag = *frag; 3686 *nskb_frag = *frag;
3686 __skb_frag_ref(nskb_frag); 3687 __skb_frag_ref(nskb_frag);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f52d27a422c3..08259d078b1c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1298,14 +1298,19 @@ err_table_hash_alloc:
1298 1298
1299static void ip_fib_net_exit(struct net *net) 1299static void ip_fib_net_exit(struct net *net)
1300{ 1300{
1301 unsigned int i; 1301 int i;
1302 1302
1303 rtnl_lock(); 1303 rtnl_lock();
1304#ifdef CONFIG_IP_MULTIPLE_TABLES 1304#ifdef CONFIG_IP_MULTIPLE_TABLES
1305 RCU_INIT_POINTER(net->ipv4.fib_main, NULL); 1305 RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
1306 RCU_INIT_POINTER(net->ipv4.fib_default, NULL); 1306 RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
1307#endif 1307#endif
1308 for (i = 0; i < FIB_TABLE_HASHSZ; i++) { 1308 /* Destroy the tables in reverse order to guarantee that the
1309 * local table, ID 255, is destroyed before the main table, ID
1310 * 254. This is necessary as the local table may contain
1311 * references to data contained in the main table.
1312 */
1313 for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
1309 struct hlist_head *head = &net->ipv4.fib_table_hash[i]; 1314 struct hlist_head *head = &net->ipv4.fib_table_hash[i];
1310 struct hlist_node *tmp; 1315 struct hlist_node *tmp;
1311 struct fib_table *tb; 1316 struct fib_table *tb;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f04d944f8abe..c586597da20d 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -698,7 +698,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
698 698
699 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 699 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
700 int type = nla_type(nla); 700 int type = nla_type(nla);
701 u32 val; 701 u32 fi_val, val;
702 702
703 if (!type) 703 if (!type)
704 continue; 704 continue;
@@ -715,7 +715,11 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
715 val = nla_get_u32(nla); 715 val = nla_get_u32(nla);
716 } 716 }
717 717
718 if (fi->fib_metrics->metrics[type - 1] != val) 718 fi_val = fi->fib_metrics->metrics[type - 1];
719 if (type == RTAX_FEATURES)
720 fi_val &= ~DST_FEATURE_ECN_CA;
721
722 if (fi_val != val)
719 return false; 723 return false;
720 } 724 }
721 725
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 9c1735632c8c..45ffd3d045d2 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1310,6 +1310,7 @@ static const struct net_device_ops erspan_netdev_ops = {
1310static void ipgre_tap_setup(struct net_device *dev) 1310static void ipgre_tap_setup(struct net_device *dev)
1311{ 1311{
1312 ether_setup(dev); 1312 ether_setup(dev);
1313 dev->max_mtu = 0;
1313 dev->netdev_ops = &gre_tap_netdev_ops; 1314 dev->netdev_ops = &gre_tap_netdev_ops;
1314 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1315 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1315 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1316 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c26f71234b9c..c9441ca45399 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -210,7 +210,6 @@ lookup_protocol:
210 np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; 210 np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
211 np->mc_loop = 1; 211 np->mc_loop = 1;
212 np->pmtudisc = IPV6_PMTUDISC_WANT; 212 np->pmtudisc = IPV6_PMTUDISC_WANT;
213 np->autoflowlabel = ip6_default_np_autolabel(net);
214 np->repflow = net->ipv6.sysctl.flowlabel_reflect; 213 np->repflow = net->ipv6.sysctl.flowlabel_reflect;
215 sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; 214 sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
216 215
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4cfd8e0696fe..416c8913f132 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1308,6 +1308,7 @@ static void ip6gre_tap_setup(struct net_device *dev)
1308 1308
1309 ether_setup(dev); 1309 ether_setup(dev);
1310 1310
1311 dev->max_mtu = 0;
1311 dev->netdev_ops = &ip6gre_tap_netdev_ops; 1312 dev->netdev_ops = &ip6gre_tap_netdev_ops;
1312 dev->needs_free_netdev = true; 1313 dev->needs_free_netdev = true;
1313 dev->priv_destructor = ip6gre_dev_free; 1314 dev->priv_destructor = ip6gre_dev_free;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5110a418cc4d..f7dd51c42314 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -166,6 +166,14 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
166 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 166 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
167} 167}
168 168
169static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
170{
171 if (!np->autoflowlabel_set)
172 return ip6_default_np_autolabel(net);
173 else
174 return np->autoflowlabel;
175}
176
169/* 177/*
170 * xmit an sk_buff (used by TCP, SCTP and DCCP) 178 * xmit an sk_buff (used by TCP, SCTP and DCCP)
171 * Note : socket lock is not held for SYNACK packets, but might be modified 179 * Note : socket lock is not held for SYNACK packets, but might be modified
@@ -230,7 +238,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
230 hlimit = ip6_dst_hoplimit(dst); 238 hlimit = ip6_dst_hoplimit(dst);
231 239
232 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, 240 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
233 np->autoflowlabel, fl6)); 241 ip6_autoflowlabel(net, np), fl6));
234 242
235 hdr->payload_len = htons(seg_len); 243 hdr->payload_len = htons(seg_len);
236 hdr->nexthdr = proto; 244 hdr->nexthdr = proto;
@@ -1626,7 +1634,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
1626 1634
1627 ip6_flow_hdr(hdr, v6_cork->tclass, 1635 ip6_flow_hdr(hdr, v6_cork->tclass,
1628 ip6_make_flowlabel(net, skb, fl6->flowlabel, 1636 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1629 np->autoflowlabel, fl6)); 1637 ip6_autoflowlabel(net, np), fl6));
1630 hdr->hop_limit = v6_cork->hop_limit; 1638 hdr->hop_limit = v6_cork->hop_limit;
1631 hdr->nexthdr = proto; 1639 hdr->nexthdr = proto;
1632 hdr->saddr = fl6->saddr; 1640 hdr->saddr = fl6->saddr;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index db84f523656d..931c38f6ff4a 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1123,8 +1123,13 @@ route_lookup:
1123 max_headroom += 8; 1123 max_headroom += 8;
1124 mtu -= 8; 1124 mtu -= 8;
1125 } 1125 }
1126 if (mtu < IPV6_MIN_MTU) 1126 if (skb->protocol == htons(ETH_P_IPV6)) {
1127 mtu = IPV6_MIN_MTU; 1127 if (mtu < IPV6_MIN_MTU)
1128 mtu = IPV6_MIN_MTU;
1129 } else if (mtu < 576) {
1130 mtu = 576;
1131 }
1132
1128 if (skb_dst(skb) && !t->parms.collect_md) 1133 if (skb_dst(skb) && !t->parms.collect_md)
1129 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 1134 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
1130 if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { 1135 if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index b9404feabd78..2d4680e0376f 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -886,6 +886,7 @@ pref_skip_coa:
886 break; 886 break;
887 case IPV6_AUTOFLOWLABEL: 887 case IPV6_AUTOFLOWLABEL:
888 np->autoflowlabel = valbool; 888 np->autoflowlabel = valbool;
889 np->autoflowlabel_set = 1;
889 retv = 0; 890 retv = 0;
890 break; 891 break;
891 case IPV6_RECVFRAGSIZE: 892 case IPV6_RECVFRAGSIZE:
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7a8d1500d374..0458b761f3c5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2336,6 +2336,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
2336 } 2336 }
2337 2337
2338 rt->dst.flags |= DST_HOST; 2338 rt->dst.flags |= DST_HOST;
2339 rt->dst.input = ip6_input;
2339 rt->dst.output = ip6_output; 2340 rt->dst.output = ip6_output;
2340 rt->rt6i_gateway = fl6->daddr; 2341 rt->rt6i_gateway = fl6->daddr;
2341 rt->rt6i_dst.addr = fl6->daddr; 2342 rt->rt6i_dst.addr = fl6->daddr;
@@ -4297,19 +4298,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4297 if (!ipv6_addr_any(&fl6.saddr)) 4298 if (!ipv6_addr_any(&fl6.saddr))
4298 flags |= RT6_LOOKUP_F_HAS_SADDR; 4299 flags |= RT6_LOOKUP_F_HAS_SADDR;
4299 4300
4300 if (!fibmatch) 4301 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
4301 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
4302 else
4303 dst = ip6_route_lookup(net, &fl6, 0);
4304 4302
4305 rcu_read_unlock(); 4303 rcu_read_unlock();
4306 } else { 4304 } else {
4307 fl6.flowi6_oif = oif; 4305 fl6.flowi6_oif = oif;
4308 4306
4309 if (!fibmatch) 4307 dst = ip6_route_output(net, NULL, &fl6);
4310 dst = ip6_route_output(net, NULL, &fl6);
4311 else
4312 dst = ip6_route_lookup(net, &fl6, 0);
4313 } 4308 }
4314 4309
4315 4310
@@ -4326,6 +4321,15 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4326 goto errout; 4321 goto errout;
4327 } 4322 }
4328 4323
4324 if (fibmatch && rt->dst.from) {
4325 struct rt6_info *ort = container_of(rt->dst.from,
4326 struct rt6_info, dst);
4327
4328 dst_hold(&ort->dst);
4329 ip6_rt_put(rt);
4330 rt = ort;
4331 }
4332
4329 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 4333 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
4330 if (!skb) { 4334 if (!skb) {
4331 ip6_rt_put(rt); 4335 ip6_rt_put(rt);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index dbe2379329c5..f039064ce922 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -579,6 +579,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
579 return -EINVAL; 579 return -EINVAL;
580 580
581 skb_reset_network_header(skb); 581 skb_reset_network_header(skb);
582 key->eth.type = skb->protocol;
582 } else { 583 } else {
583 eth = eth_hdr(skb); 584 eth = eth_hdr(skb);
584 ether_addr_copy(key->eth.src, eth->h_source); 585 ether_addr_copy(key->eth.src, eth->h_source);
@@ -592,15 +593,23 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
592 if (unlikely(parse_vlan(skb, key))) 593 if (unlikely(parse_vlan(skb, key)))
593 return -ENOMEM; 594 return -ENOMEM;
594 595
595 skb->protocol = parse_ethertype(skb); 596 key->eth.type = parse_ethertype(skb);
596 if (unlikely(skb->protocol == htons(0))) 597 if (unlikely(key->eth.type == htons(0)))
597 return -ENOMEM; 598 return -ENOMEM;
598 599
600 /* Multiple tagged packets need to retain TPID to satisfy
601 * skb_vlan_pop(), which will later shift the ethertype into
602 * skb->protocol.
603 */
604 if (key->eth.cvlan.tci & htons(VLAN_TAG_PRESENT))
605 skb->protocol = key->eth.cvlan.tpid;
606 else
607 skb->protocol = key->eth.type;
608
599 skb_reset_network_header(skb); 609 skb_reset_network_header(skb);
600 __skb_push(skb, skb->data - skb_mac_header(skb)); 610 __skb_push(skb, skb->data - skb_mac_header(skb));
601 } 611 }
602 skb_reset_mac_len(skb); 612 skb_reset_mac_len(skb);
603 key->eth.type = skb->protocol;
604 613
605 /* Network layer. */ 614 /* Network layer. */
606 if (key->eth.type == htons(ETH_P_IP)) { 615 if (key->eth.type == htons(ETH_P_IP)) {
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 6fe798c2df1a..8d78e7f4ecc3 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -42,7 +42,6 @@ struct cls_bpf_prog {
42 struct list_head link; 42 struct list_head link;
43 struct tcf_result res; 43 struct tcf_result res;
44 bool exts_integrated; 44 bool exts_integrated;
45 bool offloaded;
46 u32 gen_flags; 45 u32 gen_flags;
47 struct tcf_exts exts; 46 struct tcf_exts exts;
48 u32 handle; 47 u32 handle;
@@ -148,33 +147,37 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
148} 147}
149 148
150static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, 149static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
151 enum tc_clsbpf_command cmd) 150 struct cls_bpf_prog *oldprog)
152{ 151{
153 bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE;
154 struct tcf_block *block = tp->chain->block; 152 struct tcf_block *block = tp->chain->block;
155 bool skip_sw = tc_skip_sw(prog->gen_flags);
156 struct tc_cls_bpf_offload cls_bpf = {}; 153 struct tc_cls_bpf_offload cls_bpf = {};
154 struct cls_bpf_prog *obj;
155 bool skip_sw;
157 int err; 156 int err;
158 157
158 skip_sw = prog && tc_skip_sw(prog->gen_flags);
159 obj = prog ?: oldprog;
160
159 tc_cls_common_offload_init(&cls_bpf.common, tp); 161 tc_cls_common_offload_init(&cls_bpf.common, tp);
160 cls_bpf.command = cmd; 162 cls_bpf.command = TC_CLSBPF_OFFLOAD;
161 cls_bpf.exts = &prog->exts; 163 cls_bpf.exts = &obj->exts;
162 cls_bpf.prog = prog->filter; 164 cls_bpf.prog = prog ? prog->filter : NULL;
163 cls_bpf.name = prog->bpf_name; 165 cls_bpf.oldprog = oldprog ? oldprog->filter : NULL;
164 cls_bpf.exts_integrated = prog->exts_integrated; 166 cls_bpf.name = obj->bpf_name;
165 cls_bpf.gen_flags = prog->gen_flags; 167 cls_bpf.exts_integrated = obj->exts_integrated;
168 cls_bpf.gen_flags = obj->gen_flags;
166 169
167 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); 170 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
168 if (addorrep) { 171 if (prog) {
169 if (err < 0) { 172 if (err < 0) {
170 cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY); 173 cls_bpf_offload_cmd(tp, oldprog, prog);
171 return err; 174 return err;
172 } else if (err > 0) { 175 } else if (err > 0) {
173 prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; 176 prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
174 } 177 }
175 } 178 }
176 179
177 if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW)) 180 if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
178 return -EINVAL; 181 return -EINVAL;
179 182
180 return 0; 183 return 0;
@@ -183,38 +186,17 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
183static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, 186static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
184 struct cls_bpf_prog *oldprog) 187 struct cls_bpf_prog *oldprog)
185{ 188{
186 struct cls_bpf_prog *obj = prog; 189 if (prog && oldprog && prog->gen_flags != oldprog->gen_flags)
187 enum tc_clsbpf_command cmd; 190 return -EINVAL;
188 bool skip_sw;
189 int ret;
190
191 skip_sw = tc_skip_sw(prog->gen_flags) ||
192 (oldprog && tc_skip_sw(oldprog->gen_flags));
193
194 if (oldprog && oldprog->offloaded) {
195 if (!tc_skip_hw(prog->gen_flags)) {
196 cmd = TC_CLSBPF_REPLACE;
197 } else if (!tc_skip_sw(prog->gen_flags)) {
198 obj = oldprog;
199 cmd = TC_CLSBPF_DESTROY;
200 } else {
201 return -EINVAL;
202 }
203 } else {
204 if (tc_skip_hw(prog->gen_flags))
205 return skip_sw ? -EINVAL : 0;
206 cmd = TC_CLSBPF_ADD;
207 }
208
209 ret = cls_bpf_offload_cmd(tp, obj, cmd);
210 if (ret)
211 return ret;
212 191
213 obj->offloaded = true; 192 if (prog && tc_skip_hw(prog->gen_flags))
214 if (oldprog) 193 prog = NULL;
215 oldprog->offloaded = false; 194 if (oldprog && tc_skip_hw(oldprog->gen_flags))
195 oldprog = NULL;
196 if (!prog && !oldprog)
197 return 0;
216 198
217 return 0; 199 return cls_bpf_offload_cmd(tp, prog, oldprog);
218} 200}
219 201
220static void cls_bpf_stop_offload(struct tcf_proto *tp, 202static void cls_bpf_stop_offload(struct tcf_proto *tp,
@@ -222,25 +204,26 @@ static void cls_bpf_stop_offload(struct tcf_proto *tp,
222{ 204{
223 int err; 205 int err;
224 206
225 if (!prog->offloaded) 207 err = cls_bpf_offload_cmd(tp, NULL, prog);
226 return; 208 if (err)
227
228 err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
229 if (err) {
230 pr_err("Stopping hardware offload failed: %d\n", err); 209 pr_err("Stopping hardware offload failed: %d\n", err);
231 return;
232 }
233
234 prog->offloaded = false;
235} 210}
236 211
237static void cls_bpf_offload_update_stats(struct tcf_proto *tp, 212static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
238 struct cls_bpf_prog *prog) 213 struct cls_bpf_prog *prog)
239{ 214{
240 if (!prog->offloaded) 215 struct tcf_block *block = tp->chain->block;
241 return; 216 struct tc_cls_bpf_offload cls_bpf = {};
217
218 tc_cls_common_offload_init(&cls_bpf.common, tp);
219 cls_bpf.command = TC_CLSBPF_STATS;
220 cls_bpf.exts = &prog->exts;
221 cls_bpf.prog = prog->filter;
222 cls_bpf.name = prog->bpf_name;
223 cls_bpf.exts_integrated = prog->exts_integrated;
224 cls_bpf.gen_flags = prog->gen_flags;
242 225
243 cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS); 226 tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false);
244} 227}
245 228
246static int cls_bpf_init(struct tcf_proto *tp) 229static int cls_bpf_init(struct tcf_proto *tp)
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 3f619fdcbf0a..291c97b07058 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -78,6 +78,9 @@ const char *sctp_cname(const union sctp_subtype cid)
78 case SCTP_CID_AUTH: 78 case SCTP_CID_AUTH:
79 return "AUTH"; 79 return "AUTH";
80 80
81 case SCTP_CID_RECONF:
82 return "RECONF";
83
81 default: 84 default:
82 break; 85 break;
83 } 86 }
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index a71be33f3afe..e36ec5dd64c6 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -1084,29 +1084,21 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
1084void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, 1084void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
1085 gfp_t gfp) 1085 gfp_t gfp)
1086{ 1086{
1087 struct sctp_association *asoc; 1087 struct sctp_association *asoc = ulpq->asoc;
1088 __u16 needed, freed; 1088 __u32 freed = 0;
1089 1089 __u16 needed;
1090 asoc = ulpq->asoc;
1091 1090
1092 if (chunk) { 1091 needed = ntohs(chunk->chunk_hdr->length) -
1093 needed = ntohs(chunk->chunk_hdr->length); 1092 sizeof(struct sctp_data_chunk);
1094 needed -= sizeof(struct sctp_data_chunk);
1095 } else
1096 needed = SCTP_DEFAULT_MAXWINDOW;
1097
1098 freed = 0;
1099 1093
1100 if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) { 1094 if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
1101 freed = sctp_ulpq_renege_order(ulpq, needed); 1095 freed = sctp_ulpq_renege_order(ulpq, needed);
1102 if (freed < needed) { 1096 if (freed < needed)
1103 freed += sctp_ulpq_renege_frags(ulpq, needed - freed); 1097 freed += sctp_ulpq_renege_frags(ulpq, needed - freed);
1104 }
1105 } 1098 }
1106 /* If able to free enough room, accept this chunk. */ 1099 /* If able to free enough room, accept this chunk. */
1107 if (chunk && (freed >= needed)) { 1100 if (freed >= needed) {
1108 int retval; 1101 int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
1109 retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
1110 /* 1102 /*
1111 * Enter partial delivery if chunk has not been 1103 * Enter partial delivery if chunk has not been
1112 * delivered; otherwise, drain the reassembly queue. 1104 * delivered; otherwise, drain the reassembly queue.
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 95fec2c057d6..7ebbdeb2a90e 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -351,8 +351,7 @@ void tipc_group_update_member(struct tipc_member *m, int len)
351 if (m->window >= ADV_IDLE) 351 if (m->window >= ADV_IDLE)
352 return; 352 return;
353 353
354 if (!list_empty(&m->congested)) 354 list_del_init(&m->congested);
355 return;
356 355
357 /* Sort member into congested members' list */ 356 /* Sort member into congested members' list */
358 list_for_each_entry_safe(_m, tmp, &grp->congested, congested) { 357 list_for_each_entry_safe(_m, tmp, &grp->congested, congested) {
@@ -648,6 +647,7 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
648 } else if (mtyp == GRP_REMIT_MSG) { 647 } else if (mtyp == GRP_REMIT_MSG) {
649 msg_set_grp_remitted(hdr, m->window); 648 msg_set_grp_remitted(hdr, m->window);
650 } 649 }
650 msg_set_dest_droppable(hdr, true);
651 __skb_queue_tail(xmitq, skb); 651 __skb_queue_tail(xmitq, skb);
652} 652}
653 653
@@ -689,15 +689,16 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
689 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); 689 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
690 __skb_queue_tail(inputq, m->event_msg); 690 __skb_queue_tail(inputq, m->event_msg);
691 } 691 }
692 if (m->window < ADV_IDLE) 692 list_del_init(&m->congested);
693 tipc_group_update_member(m, 0); 693 tipc_group_update_member(m, 0);
694 else
695 list_del_init(&m->congested);
696 return; 694 return;
697 case GRP_LEAVE_MSG: 695 case GRP_LEAVE_MSG:
698 if (!m) 696 if (!m)
699 return; 697 return;
700 m->bc_syncpt = msg_grp_bc_syncpt(hdr); 698 m->bc_syncpt = msg_grp_bc_syncpt(hdr);
699 list_del_init(&m->list);
700 list_del_init(&m->congested);
701 *usr_wakeup = true;
701 702
702 /* Wait until WITHDRAW event is received */ 703 /* Wait until WITHDRAW event is received */
703 if (m->state != MBR_LEAVING) { 704 if (m->state != MBR_LEAVING) {
@@ -709,8 +710,6 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
709 ehdr = buf_msg(m->event_msg); 710 ehdr = buf_msg(m->event_msg);
710 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); 711 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
711 __skb_queue_tail(inputq, m->event_msg); 712 __skb_queue_tail(inputq, m->event_msg);
712 *usr_wakeup = true;
713 list_del_init(&m->congested);
714 return; 713 return;
715 case GRP_ADV_MSG: 714 case GRP_ADV_MSG:
716 if (!m) 715 if (!m)
@@ -862,6 +861,7 @@ void tipc_group_member_evt(struct tipc_group *grp,
862 msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt); 861 msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
863 __skb_queue_tail(inputq, skb); 862 __skb_queue_tail(inputq, skb);
864 } 863 }
864 list_del_init(&m->list);
865 list_del_init(&m->congested); 865 list_del_init(&m->congested);
866 } 866 }
867 *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); 867 *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index d7d6cb00c47b..1d84f91bbfb0 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -23,27 +23,14 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
23cfg80211-y += extra-certs.o 23cfg80211-y += extra-certs.o
24endif 24endif
25 25
26$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509) 26$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
27 @$(kecho) " GEN $@" 27 @$(kecho) " GEN $@"
28 @(set -e; \ 28 @(echo '#include "reg.h"'; \
29 allf=""; \ 29 echo 'const u8 shipped_regdb_certs[] = {'; \
30 for f in $^ ; do \ 30 cat $^ ; \
31 # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \ 31 echo '};'; \
32 thisf=$$(od -An -v -tx1 < $$f | \ 32 echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
33 sed -e 's/ /\n/g' | \ 33 ) > $@
34 sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \
35 sed -e 's/^/0x/;s/$$/,/'); \
36 # file should not be empty - maybe command substitution failed? \
37 test ! -z "$$thisf";\
38 allf=$$allf$$thisf;\
39 done; \
40 ( \
41 echo '#include "reg.h"'; \
42 echo 'const u8 shipped_regdb_certs[] = {'; \
43 echo "$$allf"; \
44 echo '};'; \
45 echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
46 ) >> $@)
47 34
48$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ 35$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
49 $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509) 36 $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
@@ -66,4 +53,6 @@ $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
66 echo "$$allf"; \ 53 echo "$$allf"; \
67 echo '};'; \ 54 echo '};'; \
68 echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \ 55 echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \
69 ) >> $@) 56 ) > $@)
57
58clean-files += shipped-certs.c extra-certs.c
diff --git a/net/wireless/certs/sforshee.hex b/net/wireless/certs/sforshee.hex
new file mode 100644
index 000000000000..14ea66643ffa
--- /dev/null
+++ b/net/wireless/certs/sforshee.hex
@@ -0,0 +1,86 @@
1/* Seth Forshee's regdb certificate */
20x30, 0x82, 0x02, 0xa4, 0x30, 0x82, 0x01, 0x8c,
30x02, 0x09, 0x00, 0xb2, 0x8d, 0xdf, 0x47, 0xae,
40xf9, 0xce, 0xa7, 0x30, 0x0d, 0x06, 0x09, 0x2a,
50x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0b,
60x05, 0x00, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f,
70x06, 0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73,
80x66, 0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30,
90x20, 0x17, 0x0d, 0x31, 0x37, 0x31, 0x30, 0x30,
100x36, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35, 0x5a,
110x18, 0x0f, 0x32, 0x31, 0x31, 0x37, 0x30, 0x39,
120x31, 0x32, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35,
130x5a, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f, 0x06,
140x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73, 0x66,
150x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30, 0x82,
160x01, 0x22, 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86,
170x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05,
180x00, 0x03, 0x82, 0x01, 0x0f, 0x00, 0x30, 0x82,
190x01, 0x0a, 0x02, 0x82, 0x01, 0x01, 0x00, 0xb5,
200x40, 0xe3, 0x9c, 0x28, 0x84, 0x39, 0x03, 0xf2,
210x39, 0xd7, 0x66, 0x2c, 0x41, 0x38, 0x15, 0xac,
220x7e, 0xa5, 0x83, 0x71, 0x25, 0x7e, 0x90, 0x7c,
230x68, 0xdd, 0x6f, 0x3f, 0xd9, 0xd7, 0x59, 0x38,
240x9f, 0x7c, 0x6a, 0x52, 0xc2, 0x03, 0x2a, 0x2d,
250x7e, 0x66, 0xf4, 0x1e, 0xb3, 0x12, 0x70, 0x20,
260x5b, 0xd4, 0x97, 0x32, 0x3d, 0x71, 0x8b, 0x3b,
270x1b, 0x08, 0x17, 0x14, 0x6b, 0x61, 0xc4, 0x57,
280x8b, 0x96, 0x16, 0x1c, 0xfd, 0x24, 0xd5, 0x0b,
290x09, 0xf9, 0x68, 0x11, 0x84, 0xfb, 0xca, 0x51,
300x0c, 0xd1, 0x45, 0x19, 0xda, 0x10, 0x44, 0x8a,
310xd9, 0xfe, 0x76, 0xa9, 0xfd, 0x60, 0x2d, 0x18,
320x0b, 0x28, 0x95, 0xb2, 0x2d, 0xea, 0x88, 0x98,
330xb8, 0xd1, 0x56, 0x21, 0xf0, 0x53, 0x1f, 0xf1,
340x02, 0x6f, 0xe9, 0x46, 0x9b, 0x93, 0x5f, 0x28,
350x90, 0x0f, 0xac, 0x36, 0xfa, 0x68, 0x23, 0x71,
360x57, 0x56, 0xf6, 0xcc, 0xd3, 0xdf, 0x7d, 0x2a,
370xd9, 0x1b, 0x73, 0x45, 0xeb, 0xba, 0x27, 0x85,
380xef, 0x7a, 0x7f, 0xa5, 0xcb, 0x80, 0xc7, 0x30,
390x36, 0xd2, 0x53, 0xee, 0xec, 0xac, 0x1e, 0xe7,
400x31, 0xf1, 0x36, 0xa2, 0x9c, 0x63, 0xc6, 0x65,
410x5b, 0x7f, 0x25, 0x75, 0x68, 0xa1, 0xea, 0xd3,
420x7e, 0x00, 0x5c, 0x9a, 0x5e, 0xd8, 0x20, 0x18,
430x32, 0x77, 0x07, 0x29, 0x12, 0x66, 0x1e, 0x36,
440x73, 0xe7, 0x97, 0x04, 0x41, 0x37, 0xb1, 0xb1,
450x72, 0x2b, 0xf4, 0xa1, 0x29, 0x20, 0x7c, 0x96,
460x79, 0x0b, 0x2b, 0xd0, 0xd8, 0xde, 0xc8, 0x6c,
470x3f, 0x93, 0xfb, 0xc5, 0xee, 0x78, 0x52, 0x11,
480x15, 0x1b, 0x7a, 0xf6, 0xe2, 0x68, 0x99, 0xe7,
490xfb, 0x46, 0x16, 0x84, 0xe3, 0xc7, 0xa1, 0xe6,
500xe0, 0xd2, 0x46, 0xd5, 0xe1, 0xc4, 0x5f, 0xa0,
510x66, 0xf4, 0xda, 0xc4, 0xff, 0x95, 0x1d, 0x02,
520x03, 0x01, 0x00, 0x01, 0x30, 0x0d, 0x06, 0x09,
530x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01,
540x0b, 0x05, 0x00, 0x03, 0x82, 0x01, 0x01, 0x00,
550x87, 0x03, 0xda, 0xf2, 0x82, 0xc2, 0xdd, 0xaf,
560x7c, 0x44, 0x2f, 0x86, 0xd3, 0x5f, 0x4c, 0x93,
570x48, 0xb9, 0xfe, 0x07, 0x17, 0xbb, 0x21, 0xf7,
580x25, 0x23, 0x4e, 0xaa, 0x22, 0x0c, 0x16, 0xb9,
590x73, 0xae, 0x9d, 0x46, 0x7c, 0x75, 0xd9, 0xc3,
600x49, 0x57, 0x47, 0xbf, 0x33, 0xb7, 0x97, 0xec,
610xf5, 0x40, 0x75, 0xc0, 0x46, 0x22, 0xf0, 0xa0,
620x5d, 0x9c, 0x79, 0x13, 0xa1, 0xff, 0xb8, 0xa3,
630x2f, 0x7b, 0x8e, 0x06, 0x3f, 0xc8, 0xb6, 0xe4,
640x6a, 0x28, 0xf2, 0x34, 0x5c, 0x23, 0x3f, 0x32,
650xc0, 0xe6, 0xad, 0x0f, 0xac, 0xcf, 0x55, 0x74,
660x47, 0x73, 0xd3, 0x01, 0x85, 0xb7, 0x0b, 0x22,
670x56, 0x24, 0x7d, 0x9f, 0x09, 0xa9, 0x0e, 0x86,
680x9e, 0x37, 0x5b, 0x9c, 0x6d, 0x02, 0xd9, 0x8c,
690xc8, 0x50, 0x6a, 0xe2, 0x59, 0xf3, 0x16, 0x06,
700xea, 0xb2, 0x42, 0xb5, 0x58, 0xfe, 0xba, 0xd1,
710x81, 0x57, 0x1a, 0xef, 0xb2, 0x38, 0x88, 0x58,
720xf6, 0xaa, 0xc4, 0x2e, 0x8b, 0x5a, 0x27, 0xe4,
730xa5, 0xe8, 0xa4, 0xca, 0x67, 0x5c, 0xac, 0x72,
740x67, 0xc3, 0x6f, 0x13, 0xc3, 0x2d, 0x35, 0x79,
750xd7, 0x8a, 0xe7, 0xf5, 0xd4, 0x21, 0x30, 0x4a,
760xd5, 0xf6, 0xa3, 0xd9, 0x79, 0x56, 0xf2, 0x0f,
770x10, 0xf7, 0x7d, 0xd0, 0x51, 0x93, 0x2f, 0x47,
780xf8, 0x7d, 0x4b, 0x0a, 0x84, 0x55, 0x12, 0x0a,
790x7d, 0x4e, 0x3b, 0x1f, 0x2b, 0x2f, 0xfc, 0x28,
800xb3, 0x69, 0x34, 0xe1, 0x80, 0x80, 0xbb, 0xe2,
810xaf, 0xb9, 0xd6, 0x30, 0xf1, 0x1d, 0x54, 0x87,
820x23, 0x99, 0x9f, 0x51, 0x03, 0x4c, 0x45, 0x7d,
830x02, 0x65, 0x73, 0xab, 0xfd, 0xcf, 0x94, 0xcc,
840x0d, 0x3a, 0x60, 0xfd, 0x3c, 0x14, 0x2f, 0x16,
850x33, 0xa9, 0x21, 0x1f, 0xcb, 0x50, 0xb1, 0x8f,
860x03, 0xee, 0xa0, 0x66, 0xa9, 0x16, 0x79, 0x14,
diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509
deleted file mode 100644
index c6f8f9d6b988..000000000000
--- a/net/wireless/certs/sforshee.x509
+++ /dev/null
Binary files differ
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b1ac23ca20c8..213d0c498c97 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2610,7 +2610,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
2610 case NL80211_IFTYPE_AP: 2610 case NL80211_IFTYPE_AP:
2611 if (wdev->ssid_len && 2611 if (wdev->ssid_len &&
2612 nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid)) 2612 nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
2613 goto nla_put_failure; 2613 goto nla_put_failure_locked;
2614 break; 2614 break;
2615 case NL80211_IFTYPE_STATION: 2615 case NL80211_IFTYPE_STATION:
2616 case NL80211_IFTYPE_P2P_CLIENT: 2616 case NL80211_IFTYPE_P2P_CLIENT:
@@ -2623,7 +2623,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
2623 if (!ssid_ie) 2623 if (!ssid_ie)
2624 break; 2624 break;
2625 if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2)) 2625 if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
2626 goto nla_put_failure; 2626 goto nla_put_failure_locked;
2627 break; 2627 break;
2628 } 2628 }
2629 default: 2629 default:
@@ -2635,6 +2635,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
2635 genlmsg_end(msg, hdr); 2635 genlmsg_end(msg, hdr);
2636 return 0; 2636 return 0;
2637 2637
2638 nla_put_failure_locked:
2639 wdev_unlock(wdev);
2638 nla_put_failure: 2640 nla_put_failure:
2639 genlmsg_cancel(msg, hdr); 2641 genlmsg_cancel(msg, hdr);
2640 return -EMSGSIZE; 2642 return -EMSGSIZE;
diff --git a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
index cefe7c7cd4f6..0a8e37a519f2 100644
--- a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
+++ b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
@@ -2,7 +2,7 @@
2#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ 2#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
3#define _UAPI__ASM_BPF_PERF_EVENT_H__ 3#define _UAPI__ASM_BPF_PERF_EVENT_H__
4 4
5#include <asm/ptrace.h> 5#include "ptrace.h"
6 6
7typedef user_pt_regs bpf_user_pt_regs_t; 7typedef user_pt_regs bpf_user_pt_regs_t;
8 8
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 217cf6f95c36..a5684d0968b4 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -478,7 +478,7 @@ class Provider(object):
478 @staticmethod 478 @staticmethod
479 def is_field_wanted(fields_filter, field): 479 def is_field_wanted(fields_filter, field):
480 """Indicate whether field is valid according to fields_filter.""" 480 """Indicate whether field is valid according to fields_filter."""
481 if not fields_filter or fields_filter == "help": 481 if not fields_filter:
482 return True 482 return True
483 return re.match(fields_filter, field) is not None 483 return re.match(fields_filter, field) is not None
484 484
@@ -549,8 +549,8 @@ class TracepointProvider(Provider):
549 549
550 def update_fields(self, fields_filter): 550 def update_fields(self, fields_filter):
551 """Refresh fields, applying fields_filter""" 551 """Refresh fields, applying fields_filter"""
552 self._fields = [field for field in self.get_available_fields() 552 self.fields = [field for field in self.get_available_fields()
553 if self.is_field_wanted(fields_filter, field)] 553 if self.is_field_wanted(fields_filter, field)]
554 554
555 @staticmethod 555 @staticmethod
556 def get_online_cpus(): 556 def get_online_cpus():
@@ -950,7 +950,8 @@ class Tui(object):
950 curses.nocbreak() 950 curses.nocbreak()
951 curses.endwin() 951 curses.endwin()
952 952
953 def get_all_gnames(self): 953 @staticmethod
954 def get_all_gnames():
954 """Returns a list of (pid, gname) tuples of all running guests""" 955 """Returns a list of (pid, gname) tuples of all running guests"""
955 res = [] 956 res = []
956 try: 957 try:
@@ -963,7 +964,7 @@ class Tui(object):
963 # perform a sanity check before calling the more expensive 964 # perform a sanity check before calling the more expensive
964 # function to possibly extract the guest name 965 # function to possibly extract the guest name
965 if ' -name ' in line[1]: 966 if ' -name ' in line[1]:
966 res.append((line[0], self.get_gname_from_pid(line[0]))) 967 res.append((line[0], Tui.get_gname_from_pid(line[0])))
967 child.stdout.close() 968 child.stdout.close()
968 969
969 return res 970 return res
@@ -984,7 +985,8 @@ class Tui(object):
984 except Exception: 985 except Exception:
985 self.screen.addstr(row + 1, 2, 'Not available') 986 self.screen.addstr(row + 1, 2, 'Not available')
986 987
987 def get_pid_from_gname(self, gname): 988 @staticmethod
989 def get_pid_from_gname(gname):
988 """Fuzzy function to convert guest name to QEMU process pid. 990 """Fuzzy function to convert guest name to QEMU process pid.
989 991
990 Returns a list of potential pids, can be empty if no match found. 992 Returns a list of potential pids, can be empty if no match found.
@@ -992,7 +994,7 @@ class Tui(object):
992 994
993 """ 995 """
994 pids = [] 996 pids = []
995 for line in self.get_all_gnames(): 997 for line in Tui.get_all_gnames():
996 if gname == line[1]: 998 if gname == line[1]:
997 pids.append(int(line[0])) 999 pids.append(int(line[0]))
998 1000
@@ -1090,15 +1092,16 @@ class Tui(object):
1090 # sort by totals 1092 # sort by totals
1091 return (0, -stats[x][0]) 1093 return (0, -stats[x][0])
1092 total = 0. 1094 total = 0.
1093 for val in stats.values(): 1095 for key in stats.keys():
1094 total += val[0] 1096 if key.find('(') is -1:
1097 total += stats[key][0]
1095 if self._sorting == SORT_DEFAULT: 1098 if self._sorting == SORT_DEFAULT:
1096 sortkey = sortCurAvg 1099 sortkey = sortCurAvg
1097 else: 1100 else:
1098 sortkey = sortTotal 1101 sortkey = sortTotal
1102 tavg = 0
1099 for key in sorted(stats.keys(), key=sortkey): 1103 for key in sorted(stats.keys(), key=sortkey):
1100 1104 if row >= self.screen.getmaxyx()[0] - 1:
1101 if row >= self.screen.getmaxyx()[0]:
1102 break 1105 break
1103 values = stats[key] 1106 values = stats[key]
1104 if not values[0] and not values[1]: 1107 if not values[0] and not values[1]:
@@ -1110,9 +1113,15 @@ class Tui(object):
1110 self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % 1113 self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' %
1111 (key, values[0], values[0] * 100 / total, 1114 (key, values[0], values[0] * 100 / total,
1112 cur)) 1115 cur))
1116 if cur is not '' and key.find('(') is -1:
1117 tavg += cur
1113 row += 1 1118 row += 1
1114 if row == 3: 1119 if row == 3:
1115 self.screen.addstr(4, 1, 'No matching events reported yet') 1120 self.screen.addstr(4, 1, 'No matching events reported yet')
1121 else:
1122 self.screen.addstr(row, 1, '%-40s %10d %8s' %
1123 ('Total', total, tavg if tavg else ''),
1124 curses.A_BOLD)
1116 self.screen.refresh() 1125 self.screen.refresh()
1117 1126
1118 def show_msg(self, text): 1127 def show_msg(self, text):
@@ -1358,7 +1367,7 @@ class Tui(object):
1358 if char == 'x': 1367 if char == 'x':
1359 self.update_drilldown() 1368 self.update_drilldown()
1360 # prevents display of current values on next refresh 1369 # prevents display of current values on next refresh
1361 self.stats.get() 1370 self.stats.get(self._display_guests)
1362 except KeyboardInterrupt: 1371 except KeyboardInterrupt:
1363 break 1372 break
1364 except curses.error: 1373 except curses.error:
@@ -1451,16 +1460,13 @@ Press any other key to refresh statistics immediately.
1451 try: 1460 try:
1452 pids = Tui.get_pid_from_gname(val) 1461 pids = Tui.get_pid_from_gname(val)
1453 except: 1462 except:
1454 raise optparse.OptionValueError('Error while searching for guest ' 1463 sys.exit('Error while searching for guest "{}". Use "-p" to '
1455 '"{}", use "-p" to specify a pid ' 1464 'specify a pid instead?'.format(val))
1456 'instead'.format(val))
1457 if len(pids) == 0: 1465 if len(pids) == 0:
1458 raise optparse.OptionValueError('No guest by the name "{}" ' 1466 sys.exit('Error: No guest by the name "{}" found'.format(val))
1459 'found'.format(val))
1460 if len(pids) > 1: 1467 if len(pids) > 1:
1461 raise optparse.OptionValueError('Multiple processes found (pids: ' 1468 sys.exit('Error: Multiple processes found (pids: {}). Use "-p" '
1462 '{}) - use "-p" to specify a pid ' 1469 'to specify the desired pid'.format(" ".join(pids)))
1463 'instead'.format(" ".join(pids)))
1464 parser.values.pid = pids[0] 1470 parser.values.pid = pids[0]
1465 1471
1466 optparser = optparse.OptionParser(description=description_text, 1472 optparser = optparse.OptionParser(description=description_text,
@@ -1518,7 +1524,16 @@ Press any other key to refresh statistics immediately.
1518 help='restrict statistics to guest by name', 1524 help='restrict statistics to guest by name',
1519 callback=cb_guest_to_pid, 1525 callback=cb_guest_to_pid,
1520 ) 1526 )
1521 (options, _) = optparser.parse_args(sys.argv) 1527 options, unkn = optparser.parse_args(sys.argv)
1528 if len(unkn) != 1:
1529 sys.exit('Error: Extra argument(s): ' + ' '.join(unkn[1:]))
1530 try:
1531 # verify that we were passed a valid regex up front
1532 re.compile(options.fields)
1533 except re.error:
1534 sys.exit('Error: "' + options.fields + '" is not a valid regular '
1535 'expression')
1536
1522 return options 1537 return options
1523 1538
1524 1539
@@ -1564,16 +1579,13 @@ def main():
1564 1579
1565 stats = Stats(options) 1580 stats = Stats(options)
1566 1581
1567 if options.fields == "help": 1582 if options.fields == 'help':
1568 event_list = "\n" 1583 stats.fields_filter = None
1569 s = stats.get() 1584 event_list = []
1570 for key in s.keys(): 1585 for key in stats.get().keys():
1571 if key.find('(') != -1: 1586 event_list.append(key.split('(', 1)[0])
1572 key = key[0:key.find('(')] 1587 sys.stdout.write(' ' + '\n '.join(sorted(set(event_list))) + '\n')
1573 if event_list.find('\n' + key + '\n') == -1: 1588 sys.exit(0)
1574 event_list += key + '\n'
1575 sys.stdout.write(event_list)
1576 return ""
1577 1589
1578 if options.log: 1590 if options.log:
1579 log(stats) 1591 log(stats)
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt
index e5cf836be8a1..b5b3810c9e94 100644
--- a/tools/kvm/kvm_stat/kvm_stat.txt
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -50,6 +50,8 @@ INTERACTIVE COMMANDS
50*s*:: set update interval 50*s*:: set update interval
51 51
52*x*:: toggle reporting of stats for child trace events 52*x*:: toggle reporting of stats for child trace events
53 :: *Note*: The stats for the parents summarize the respective child trace
54 events
53 55
54Press any other key to refresh statistics immediately. 56Press any other key to refresh statistics immediately.
55 57
@@ -86,7 +88,7 @@ OPTIONS
86 88
87-f<fields>:: 89-f<fields>::
88--fields=<fields>:: 90--fields=<fields>::
89 fields to display (regex) 91 fields to display (regex), "-f help" for a list of available events
90 92
91-h:: 93-h::
92--help:: 94--help::
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 792af7c3b74f..05fc4e2e7b3a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -11,7 +11,7 @@ ifneq ($(wildcard $(GENHDR)),)
11endif 11endif
12 12
13CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include 13CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
14LDLIBS += -lcap -lelf 14LDLIBS += -lcap -lelf -lrt
15 15
16TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ 16TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
17 test_align test_verifier_log test_dev_cgroup 17 test_align test_verifier_log test_dev_cgroup
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 69427531408d..6761be18a91f 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -351,7 +351,7 @@ static void test_bpf_obj_id(void)
351 info_len != sizeof(struct bpf_map_info) || 351 info_len != sizeof(struct bpf_map_info) ||
352 strcmp((char *)map_infos[i].name, expected_map_name), 352 strcmp((char *)map_infos[i].name, expected_map_name),
353 "get-map-info(fd)", 353 "get-map-info(fd)",
354 "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n", 354 "err %d errno %d type %d(%d) info_len %u(%Zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
355 err, errno, 355 err, errno,
356 map_infos[i].type, BPF_MAP_TYPE_ARRAY, 356 map_infos[i].type, BPF_MAP_TYPE_ARRAY,
357 info_len, sizeof(struct bpf_map_info), 357 info_len, sizeof(struct bpf_map_info),
@@ -395,7 +395,7 @@ static void test_bpf_obj_id(void)
395 *(int *)prog_infos[i].map_ids != map_infos[i].id || 395 *(int *)prog_infos[i].map_ids != map_infos[i].id ||
396 strcmp((char *)prog_infos[i].name, expected_prog_name), 396 strcmp((char *)prog_infos[i].name, expected_prog_name),
397 "get-prog-info(fd)", 397 "get-prog-info(fd)",
398 "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n", 398 "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
399 err, errno, i, 399 err, errno, i,
400 prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER, 400 prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
401 info_len, sizeof(struct bpf_prog_info), 401 info_len, sizeof(struct bpf_prog_info),
@@ -463,7 +463,7 @@ static void test_bpf_obj_id(void)
463 memcmp(&prog_info, &prog_infos[i], info_len) || 463 memcmp(&prog_info, &prog_infos[i], info_len) ||
464 *(int *)prog_info.map_ids != saved_map_id, 464 *(int *)prog_info.map_ids != saved_map_id,
465 "get-prog-info(next_id->fd)", 465 "get-prog-info(next_id->fd)",
466 "err %d errno %d info_len %u(%lu) memcmp %d map_id %u(%u)\n", 466 "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n",
467 err, errno, info_len, sizeof(struct bpf_prog_info), 467 err, errno, info_len, sizeof(struct bpf_prog_info),
468 memcmp(&prog_info, &prog_infos[i], info_len), 468 memcmp(&prog_info, &prog_infos[i], info_len),
469 *(int *)prog_info.map_ids, saved_map_id); 469 *(int *)prog_info.map_ids, saved_map_id);
@@ -509,7 +509,7 @@ static void test_bpf_obj_id(void)
509 memcmp(&map_info, &map_infos[i], info_len) || 509 memcmp(&map_info, &map_infos[i], info_len) ||
510 array_value != array_magic_value, 510 array_value != array_magic_value,
511 "check get-map-info(next_id->fd)", 511 "check get-map-info(next_id->fd)",
512 "err %d errno %d info_len %u(%lu) memcmp %d array_value %llu(%llu)\n", 512 "err %d errno %d info_len %u(%Zu) memcmp %d array_value %llu(%llu)\n",
513 err, errno, info_len, sizeof(struct bpf_map_info), 513 err, errno, info_len, sizeof(struct bpf_map_info),
514 memcmp(&map_info, &map_infos[i], info_len), 514 memcmp(&map_info, &map_infos[i], info_len),
515 array_value, array_magic_value); 515 array_value, array_magic_value);
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3c64f30cf63c..b51017404c62 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -422,9 +422,7 @@ static struct bpf_test tests[] = {
422 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), 422 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
423 BPF_EXIT_INSN(), 423 BPF_EXIT_INSN(),
424 }, 424 },
425 .errstr_unpriv = "R1 subtraction from stack pointer", 425 .errstr = "R1 subtraction from stack pointer",
426 .result_unpriv = REJECT,
427 .errstr = "R1 invalid mem access",
428 .result = REJECT, 426 .result = REJECT,
429 }, 427 },
430 { 428 {
@@ -606,7 +604,6 @@ static struct bpf_test tests[] = {
606 }, 604 },
607 .errstr = "misaligned stack access", 605 .errstr = "misaligned stack access",
608 .result = REJECT, 606 .result = REJECT,
609 .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
610 }, 607 },
611 { 608 {
612 "invalid map_fd for function call", 609 "invalid map_fd for function call",
@@ -1797,7 +1794,6 @@ static struct bpf_test tests[] = {
1797 }, 1794 },
1798 .result = REJECT, 1795 .result = REJECT,
1799 .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8", 1796 .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8",
1800 .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
1801 }, 1797 },
1802 { 1798 {
1803 "PTR_TO_STACK store/load - bad alignment on reg", 1799 "PTR_TO_STACK store/load - bad alignment on reg",
@@ -1810,7 +1806,6 @@ static struct bpf_test tests[] = {
1810 }, 1806 },
1811 .result = REJECT, 1807 .result = REJECT,
1812 .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8", 1808 .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8",
1813 .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
1814 }, 1809 },
1815 { 1810 {
1816 "PTR_TO_STACK store/load - out of bounds low", 1811 "PTR_TO_STACK store/load - out of bounds low",
@@ -1862,9 +1857,8 @@ static struct bpf_test tests[] = {
1862 BPF_MOV64_IMM(BPF_REG_0, 0), 1857 BPF_MOV64_IMM(BPF_REG_0, 0),
1863 BPF_EXIT_INSN(), 1858 BPF_EXIT_INSN(),
1864 }, 1859 },
1865 .result = ACCEPT, 1860 .result = REJECT,
1866 .result_unpriv = REJECT, 1861 .errstr = "R1 pointer += pointer",
1867 .errstr_unpriv = "R1 pointer += pointer",
1868 }, 1862 },
1869 { 1863 {
1870 "unpriv: neg pointer", 1864 "unpriv: neg pointer",
@@ -2592,7 +2586,8 @@ static struct bpf_test tests[] = {
2592 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 2586 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
2593 offsetof(struct __sk_buff, data)), 2587 offsetof(struct __sk_buff, data)),
2594 BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4), 2588 BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4),
2595 BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), 2589 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
2590 offsetof(struct __sk_buff, len)),
2596 BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49), 2591 BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49),
2597 BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49), 2592 BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49),
2598 BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2), 2593 BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
@@ -2899,7 +2894,7 @@ static struct bpf_test tests[] = {
2899 BPF_MOV64_IMM(BPF_REG_0, 0), 2894 BPF_MOV64_IMM(BPF_REG_0, 0),
2900 BPF_EXIT_INSN(), 2895 BPF_EXIT_INSN(),
2901 }, 2896 },
2902 .errstr = "invalid access to packet", 2897 .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
2903 .result = REJECT, 2898 .result = REJECT,
2904 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 2899 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
2905 }, 2900 },
@@ -3885,9 +3880,7 @@ static struct bpf_test tests[] = {
3885 BPF_EXIT_INSN(), 3880 BPF_EXIT_INSN(),
3886 }, 3881 },
3887 .fixup_map2 = { 3, 11 }, 3882 .fixup_map2 = { 3, 11 },
3888 .errstr_unpriv = "R0 pointer += pointer", 3883 .errstr = "R0 pointer += pointer",
3889 .errstr = "R0 invalid mem access 'inv'",
3890 .result_unpriv = REJECT,
3891 .result = REJECT, 3884 .result = REJECT,
3892 .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, 3885 .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
3893 }, 3886 },
@@ -3928,7 +3921,7 @@ static struct bpf_test tests[] = {
3928 BPF_EXIT_INSN(), 3921 BPF_EXIT_INSN(),
3929 }, 3922 },
3930 .fixup_map1 = { 4 }, 3923 .fixup_map1 = { 4 },
3931 .errstr = "R4 invalid mem access", 3924 .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
3932 .result = REJECT, 3925 .result = REJECT,
3933 .prog_type = BPF_PROG_TYPE_SCHED_CLS 3926 .prog_type = BPF_PROG_TYPE_SCHED_CLS
3934 }, 3927 },
@@ -3949,7 +3942,7 @@ static struct bpf_test tests[] = {
3949 BPF_EXIT_INSN(), 3942 BPF_EXIT_INSN(),
3950 }, 3943 },
3951 .fixup_map1 = { 4 }, 3944 .fixup_map1 = { 4 },
3952 .errstr = "R4 invalid mem access", 3945 .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
3953 .result = REJECT, 3946 .result = REJECT,
3954 .prog_type = BPF_PROG_TYPE_SCHED_CLS 3947 .prog_type = BPF_PROG_TYPE_SCHED_CLS
3955 }, 3948 },
@@ -3970,7 +3963,7 @@ static struct bpf_test tests[] = {
3970 BPF_EXIT_INSN(), 3963 BPF_EXIT_INSN(),
3971 }, 3964 },
3972 .fixup_map1 = { 4 }, 3965 .fixup_map1 = { 4 },
3973 .errstr = "R4 invalid mem access", 3966 .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
3974 .result = REJECT, 3967 .result = REJECT,
3975 .prog_type = BPF_PROG_TYPE_SCHED_CLS 3968 .prog_type = BPF_PROG_TYPE_SCHED_CLS
3976 }, 3969 },
@@ -5195,10 +5188,8 @@ static struct bpf_test tests[] = {
5195 BPF_EXIT_INSN(), 5188 BPF_EXIT_INSN(),
5196 }, 5189 },
5197 .fixup_map2 = { 3 }, 5190 .fixup_map2 = { 3 },
5198 .errstr_unpriv = "R0 bitwise operator &= on pointer", 5191 .errstr = "R0 bitwise operator &= on pointer",
5199 .errstr = "invalid mem access 'inv'",
5200 .result = REJECT, 5192 .result = REJECT,
5201 .result_unpriv = REJECT,
5202 }, 5193 },
5203 { 5194 {
5204 "map element value illegal alu op, 2", 5195 "map element value illegal alu op, 2",
@@ -5214,10 +5205,8 @@ static struct bpf_test tests[] = {
5214 BPF_EXIT_INSN(), 5205 BPF_EXIT_INSN(),
5215 }, 5206 },
5216 .fixup_map2 = { 3 }, 5207 .fixup_map2 = { 3 },
5217 .errstr_unpriv = "R0 32-bit pointer arithmetic prohibited", 5208 .errstr = "R0 32-bit pointer arithmetic prohibited",
5218 .errstr = "invalid mem access 'inv'",
5219 .result = REJECT, 5209 .result = REJECT,
5220 .result_unpriv = REJECT,
5221 }, 5210 },
5222 { 5211 {
5223 "map element value illegal alu op, 3", 5212 "map element value illegal alu op, 3",
@@ -5233,10 +5222,8 @@ static struct bpf_test tests[] = {
5233 BPF_EXIT_INSN(), 5222 BPF_EXIT_INSN(),
5234 }, 5223 },
5235 .fixup_map2 = { 3 }, 5224 .fixup_map2 = { 3 },
5236 .errstr_unpriv = "R0 pointer arithmetic with /= operator", 5225 .errstr = "R0 pointer arithmetic with /= operator",
5237 .errstr = "invalid mem access 'inv'",
5238 .result = REJECT, 5226 .result = REJECT,
5239 .result_unpriv = REJECT,
5240 }, 5227 },
5241 { 5228 {
5242 "map element value illegal alu op, 4", 5229 "map element value illegal alu op, 4",
@@ -6019,8 +6006,7 @@ static struct bpf_test tests[] = {
6019 BPF_EXIT_INSN(), 6006 BPF_EXIT_INSN(),
6020 }, 6007 },
6021 .fixup_map_in_map = { 3 }, 6008 .fixup_map_in_map = { 3 },
6022 .errstr = "R1 type=inv expected=map_ptr", 6009 .errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
6023 .errstr_unpriv = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
6024 .result = REJECT, 6010 .result = REJECT,
6025 }, 6011 },
6026 { 6012 {
@@ -6117,6 +6103,30 @@ static struct bpf_test tests[] = {
6117 .result = ACCEPT, 6103 .result = ACCEPT,
6118 }, 6104 },
6119 { 6105 {
6106 "ld_abs: tests on r6 and skb data reload helper",
6107 .insns = {
6108 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
6109 BPF_LD_ABS(BPF_B, 0),
6110 BPF_LD_ABS(BPF_H, 0),
6111 BPF_LD_ABS(BPF_W, 0),
6112 BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
6113 BPF_MOV64_IMM(BPF_REG_6, 0),
6114 BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
6115 BPF_MOV64_IMM(BPF_REG_2, 1),
6116 BPF_MOV64_IMM(BPF_REG_3, 2),
6117 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6118 BPF_FUNC_skb_vlan_push),
6119 BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
6120 BPF_LD_ABS(BPF_B, 0),
6121 BPF_LD_ABS(BPF_H, 0),
6122 BPF_LD_ABS(BPF_W, 0),
6123 BPF_MOV64_IMM(BPF_REG_0, 42),
6124 BPF_EXIT_INSN(),
6125 },
6126 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
6127 .result = ACCEPT,
6128 },
6129 {
6120 "ld_ind: check calling conv, r1", 6130 "ld_ind: check calling conv, r1",
6121 .insns = { 6131 .insns = {
6122 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), 6132 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
@@ -6300,7 +6310,7 @@ static struct bpf_test tests[] = {
6300 BPF_EXIT_INSN(), 6310 BPF_EXIT_INSN(),
6301 }, 6311 },
6302 .fixup_map1 = { 3 }, 6312 .fixup_map1 = { 3 },
6303 .errstr = "R0 min value is negative", 6313 .errstr = "unbounded min value",
6304 .result = REJECT, 6314 .result = REJECT,
6305 }, 6315 },
6306 { 6316 {
@@ -6324,7 +6334,7 @@ static struct bpf_test tests[] = {
6324 BPF_EXIT_INSN(), 6334 BPF_EXIT_INSN(),
6325 }, 6335 },
6326 .fixup_map1 = { 3 }, 6336 .fixup_map1 = { 3 },
6327 .errstr = "R0 min value is negative", 6337 .errstr = "unbounded min value",
6328 .result = REJECT, 6338 .result = REJECT,
6329 }, 6339 },
6330 { 6340 {
@@ -6350,7 +6360,7 @@ static struct bpf_test tests[] = {
6350 BPF_EXIT_INSN(), 6360 BPF_EXIT_INSN(),
6351 }, 6361 },
6352 .fixup_map1 = { 3 }, 6362 .fixup_map1 = { 3 },
6353 .errstr = "R8 invalid mem access 'inv'", 6363 .errstr = "unbounded min value",
6354 .result = REJECT, 6364 .result = REJECT,
6355 }, 6365 },
6356 { 6366 {
@@ -6375,7 +6385,7 @@ static struct bpf_test tests[] = {
6375 BPF_EXIT_INSN(), 6385 BPF_EXIT_INSN(),
6376 }, 6386 },
6377 .fixup_map1 = { 3 }, 6387 .fixup_map1 = { 3 },
6378 .errstr = "R8 invalid mem access 'inv'", 6388 .errstr = "unbounded min value",
6379 .result = REJECT, 6389 .result = REJECT,
6380 }, 6390 },
6381 { 6391 {
@@ -6423,7 +6433,7 @@ static struct bpf_test tests[] = {
6423 BPF_EXIT_INSN(), 6433 BPF_EXIT_INSN(),
6424 }, 6434 },
6425 .fixup_map1 = { 3 }, 6435 .fixup_map1 = { 3 },
6426 .errstr = "R0 min value is negative", 6436 .errstr = "unbounded min value",
6427 .result = REJECT, 6437 .result = REJECT,
6428 }, 6438 },
6429 { 6439 {
@@ -6494,7 +6504,7 @@ static struct bpf_test tests[] = {
6494 BPF_EXIT_INSN(), 6504 BPF_EXIT_INSN(),
6495 }, 6505 },
6496 .fixup_map1 = { 3 }, 6506 .fixup_map1 = { 3 },
6497 .errstr = "R0 min value is negative", 6507 .errstr = "unbounded min value",
6498 .result = REJECT, 6508 .result = REJECT,
6499 }, 6509 },
6500 { 6510 {
@@ -6545,7 +6555,7 @@ static struct bpf_test tests[] = {
6545 BPF_EXIT_INSN(), 6555 BPF_EXIT_INSN(),
6546 }, 6556 },
6547 .fixup_map1 = { 3 }, 6557 .fixup_map1 = { 3 },
6548 .errstr = "R0 min value is negative", 6558 .errstr = "unbounded min value",
6549 .result = REJECT, 6559 .result = REJECT,
6550 }, 6560 },
6551 { 6561 {
@@ -6572,7 +6582,7 @@ static struct bpf_test tests[] = {
6572 BPF_EXIT_INSN(), 6582 BPF_EXIT_INSN(),
6573 }, 6583 },
6574 .fixup_map1 = { 3 }, 6584 .fixup_map1 = { 3 },
6575 .errstr = "R0 min value is negative", 6585 .errstr = "unbounded min value",
6576 .result = REJECT, 6586 .result = REJECT,
6577 }, 6587 },
6578 { 6588 {
@@ -6598,7 +6608,7 @@ static struct bpf_test tests[] = {
6598 BPF_EXIT_INSN(), 6608 BPF_EXIT_INSN(),
6599 }, 6609 },
6600 .fixup_map1 = { 3 }, 6610 .fixup_map1 = { 3 },
6601 .errstr = "R0 min value is negative", 6611 .errstr = "unbounded min value",
6602 .result = REJECT, 6612 .result = REJECT,
6603 }, 6613 },
6604 { 6614 {
@@ -6627,7 +6637,7 @@ static struct bpf_test tests[] = {
6627 BPF_EXIT_INSN(), 6637 BPF_EXIT_INSN(),
6628 }, 6638 },
6629 .fixup_map1 = { 3 }, 6639 .fixup_map1 = { 3 },
6630 .errstr = "R0 min value is negative", 6640 .errstr = "unbounded min value",
6631 .result = REJECT, 6641 .result = REJECT,
6632 }, 6642 },
6633 { 6643 {
@@ -6657,7 +6667,7 @@ static struct bpf_test tests[] = {
6657 BPF_JMP_IMM(BPF_JA, 0, 0, -7), 6667 BPF_JMP_IMM(BPF_JA, 0, 0, -7),
6658 }, 6668 },
6659 .fixup_map1 = { 4 }, 6669 .fixup_map1 = { 4 },
6660 .errstr = "R0 min value is negative", 6670 .errstr = "unbounded min value",
6661 .result = REJECT, 6671 .result = REJECT,
6662 }, 6672 },
6663 { 6673 {
@@ -6685,8 +6695,7 @@ static struct bpf_test tests[] = {
6685 BPF_EXIT_INSN(), 6695 BPF_EXIT_INSN(),
6686 }, 6696 },
6687 .fixup_map1 = { 3 }, 6697 .fixup_map1 = { 3 },
6688 .errstr_unpriv = "R0 pointer comparison prohibited", 6698 .errstr = "unbounded min value",
6689 .errstr = "R0 min value is negative",
6690 .result = REJECT, 6699 .result = REJECT,
6691 .result_unpriv = REJECT, 6700 .result_unpriv = REJECT,
6692 }, 6701 },
@@ -6742,6 +6751,462 @@ static struct bpf_test tests[] = {
6742 .result = REJECT, 6751 .result = REJECT,
6743 }, 6752 },
6744 { 6753 {
6754 "bounds check based on zero-extended MOV",
6755 .insns = {
6756 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6757 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6758 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6759 BPF_LD_MAP_FD(BPF_REG_1, 0),
6760 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6761 BPF_FUNC_map_lookup_elem),
6762 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
6763 /* r2 = 0x0000'0000'ffff'ffff */
6764 BPF_MOV32_IMM(BPF_REG_2, 0xffffffff),
6765 /* r2 = 0 */
6766 BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
6767 /* no-op */
6768 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
6769 /* access at offset 0 */
6770 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6771 /* exit */
6772 BPF_MOV64_IMM(BPF_REG_0, 0),
6773 BPF_EXIT_INSN(),
6774 },
6775 .fixup_map1 = { 3 },
6776 .result = ACCEPT
6777 },
6778 {
6779 "bounds check based on sign-extended MOV. test1",
6780 .insns = {
6781 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6782 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6783 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6784 BPF_LD_MAP_FD(BPF_REG_1, 0),
6785 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6786 BPF_FUNC_map_lookup_elem),
6787 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
6788 /* r2 = 0xffff'ffff'ffff'ffff */
6789 BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
6790 /* r2 = 0xffff'ffff */
6791 BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
6792 /* r0 = <oob pointer> */
6793 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
6794 /* access to OOB pointer */
6795 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6796 /* exit */
6797 BPF_MOV64_IMM(BPF_REG_0, 0),
6798 BPF_EXIT_INSN(),
6799 },
6800 .fixup_map1 = { 3 },
6801 .errstr = "map_value pointer and 4294967295",
6802 .result = REJECT
6803 },
6804 {
6805 "bounds check based on sign-extended MOV. test2",
6806 .insns = {
6807 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6808 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6809 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6810 BPF_LD_MAP_FD(BPF_REG_1, 0),
6811 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6812 BPF_FUNC_map_lookup_elem),
6813 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
6814 /* r2 = 0xffff'ffff'ffff'ffff */
6815 BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
6816 /* r2 = 0xfff'ffff */
6817 BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36),
6818 /* r0 = <oob pointer> */
6819 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
6820 /* access to OOB pointer */
6821 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6822 /* exit */
6823 BPF_MOV64_IMM(BPF_REG_0, 0),
6824 BPF_EXIT_INSN(),
6825 },
6826 .fixup_map1 = { 3 },
6827 .errstr = "R0 min value is outside of the array range",
6828 .result = REJECT
6829 },
6830 {
6831 "bounds check based on reg_off + var_off + insn_off. test1",
6832 .insns = {
6833 BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
6834 offsetof(struct __sk_buff, mark)),
6835 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6836 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6837 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6838 BPF_LD_MAP_FD(BPF_REG_1, 0),
6839 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6840 BPF_FUNC_map_lookup_elem),
6841 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
6842 BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
6843 BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1),
6844 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
6845 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
6846 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
6847 BPF_MOV64_IMM(BPF_REG_0, 0),
6848 BPF_EXIT_INSN(),
6849 },
6850 .fixup_map1 = { 4 },
6851 .errstr = "value_size=8 off=1073741825",
6852 .result = REJECT,
6853 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
6854 },
6855 {
6856 "bounds check based on reg_off + var_off + insn_off. test2",
6857 .insns = {
6858 BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
6859 offsetof(struct __sk_buff, mark)),
6860 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6861 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6862 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6863 BPF_LD_MAP_FD(BPF_REG_1, 0),
6864 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6865 BPF_FUNC_map_lookup_elem),
6866 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
6867 BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
6868 BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1),
6869 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
6870 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
6871 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
6872 BPF_MOV64_IMM(BPF_REG_0, 0),
6873 BPF_EXIT_INSN(),
6874 },
6875 .fixup_map1 = { 4 },
6876 .errstr = "value 1073741823",
6877 .result = REJECT,
6878 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
6879 },
6880 {
6881 "bounds check after truncation of non-boundary-crossing range",
6882 .insns = {
6883 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6884 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6885 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6886 BPF_LD_MAP_FD(BPF_REG_1, 0),
6887 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6888 BPF_FUNC_map_lookup_elem),
6889 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
6890 /* r1 = [0x00, 0xff] */
6891 BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
6892 BPF_MOV64_IMM(BPF_REG_2, 1),
6893 /* r2 = 0x10'0000'0000 */
6894 BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36),
6895 /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */
6896 BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
6897 /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */
6898 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
6899 /* r1 = [0x00, 0xff] */
6900 BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff),
6901 /* r1 = 0 */
6902 BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
6903 /* no-op */
6904 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
6905 /* access at offset 0 */
6906 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6907 /* exit */
6908 BPF_MOV64_IMM(BPF_REG_0, 0),
6909 BPF_EXIT_INSN(),
6910 },
6911 .fixup_map1 = { 3 },
6912 .result = ACCEPT
6913 },
6914 {
6915 "bounds check after truncation of boundary-crossing range (1)",
6916 .insns = {
6917 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6918 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6919 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6920 BPF_LD_MAP_FD(BPF_REG_1, 0),
6921 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6922 BPF_FUNC_map_lookup_elem),
6923 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
6924 /* r1 = [0x00, 0xff] */
6925 BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
6926 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
6927 /* r1 = [0xffff'ff80, 0x1'0000'007f] */
6928 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
6929 /* r1 = [0xffff'ff80, 0xffff'ffff] or
6930 * [0x0000'0000, 0x0000'007f]
6931 */
6932 BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0),
6933 BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
6934 /* r1 = [0x00, 0xff] or
6935 * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
6936 */
6937 BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
6938 /* r1 = 0 or
6939 * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
6940 */
6941 BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
6942 /* no-op or OOB pointer computation */
6943 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
6944 /* potentially OOB access */
6945 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6946 /* exit */
6947 BPF_MOV64_IMM(BPF_REG_0, 0),
6948 BPF_EXIT_INSN(),
6949 },
6950 .fixup_map1 = { 3 },
6951 /* not actually fully unbounded, but the bound is very high */
6952 .errstr = "R0 unbounded memory access",
6953 .result = REJECT
6954 },
6955 {
6956 "bounds check after truncation of boundary-crossing range (2)",
6957 .insns = {
6958 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6959 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6960 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6961 BPF_LD_MAP_FD(BPF_REG_1, 0),
6962 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6963 BPF_FUNC_map_lookup_elem),
6964 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
6965 /* r1 = [0x00, 0xff] */
6966 BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
6967 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
6968 /* r1 = [0xffff'ff80, 0x1'0000'007f] */
6969 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
6970 /* r1 = [0xffff'ff80, 0xffff'ffff] or
6971 * [0x0000'0000, 0x0000'007f]
6972 * difference to previous test: truncation via MOV32
6973 * instead of ALU32.
6974 */
6975 BPF_MOV32_REG(BPF_REG_1, BPF_REG_1),
6976 BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
6977 /* r1 = [0x00, 0xff] or
6978 * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
6979 */
6980 BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
6981 /* r1 = 0 or
6982 * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
6983 */
6984 BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
6985 /* no-op or OOB pointer computation */
6986 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
6987 /* potentially OOB access */
6988 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6989 /* exit */
6990 BPF_MOV64_IMM(BPF_REG_0, 0),
6991 BPF_EXIT_INSN(),
6992 },
6993 .fixup_map1 = { 3 },
6994 /* not actually fully unbounded, but the bound is very high */
6995 .errstr = "R0 unbounded memory access",
6996 .result = REJECT
6997 },
6998 {
6999 "bounds check after wrapping 32-bit addition",
7000 .insns = {
7001 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7002 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7003 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7004 BPF_LD_MAP_FD(BPF_REG_1, 0),
7005 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7006 BPF_FUNC_map_lookup_elem),
7007 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
7008 /* r1 = 0x7fff'ffff */
7009 BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff),
7010 /* r1 = 0xffff'fffe */
7011 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
7012 /* r1 = 0 */
7013 BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2),
7014 /* no-op */
7015 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
7016 /* access at offset 0 */
7017 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
7018 /* exit */
7019 BPF_MOV64_IMM(BPF_REG_0, 0),
7020 BPF_EXIT_INSN(),
7021 },
7022 .fixup_map1 = { 3 },
7023 .result = ACCEPT
7024 },
7025 {
7026 "bounds check after shift with oversized count operand",
7027 .insns = {
7028 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7029 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7030 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7031 BPF_LD_MAP_FD(BPF_REG_1, 0),
7032 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7033 BPF_FUNC_map_lookup_elem),
7034 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
7035 BPF_MOV64_IMM(BPF_REG_2, 32),
7036 BPF_MOV64_IMM(BPF_REG_1, 1),
7037 /* r1 = (u32)1 << (u32)32 = ? */
7038 BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2),
7039 /* r1 = [0x0000, 0xffff] */
7040 BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff),
7041 /* computes unknown pointer, potentially OOB */
7042 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
7043 /* potentially OOB access */
7044 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
7045 /* exit */
7046 BPF_MOV64_IMM(BPF_REG_0, 0),
7047 BPF_EXIT_INSN(),
7048 },
7049 .fixup_map1 = { 3 },
7050 .errstr = "R0 max value is outside of the array range",
7051 .result = REJECT
7052 },
7053 {
7054 "bounds check after right shift of maybe-negative number",
7055 .insns = {
7056 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7057 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7058 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7059 BPF_LD_MAP_FD(BPF_REG_1, 0),
7060 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7061 BPF_FUNC_map_lookup_elem),
7062 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
7063 /* r1 = [0x00, 0xff] */
7064 BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
7065 /* r1 = [-0x01, 0xfe] */
7066 BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
7067 /* r1 = 0 or 0xff'ffff'ffff'ffff */
7068 BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
7069 /* r1 = 0 or 0xffff'ffff'ffff */
7070 BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
7071 /* computes unknown pointer, potentially OOB */
7072 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
7073 /* potentially OOB access */
7074 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
7075 /* exit */
7076 BPF_MOV64_IMM(BPF_REG_0, 0),
7077 BPF_EXIT_INSN(),
7078 },
7079 .fixup_map1 = { 3 },
7080 .errstr = "R0 unbounded memory access",
7081 .result = REJECT
7082 },
7083 {
7084 "bounds check map access with off+size signed 32bit overflow. test1",
7085 .insns = {
7086 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7087 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7088 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7089 BPF_LD_MAP_FD(BPF_REG_1, 0),
7090 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7091 BPF_FUNC_map_lookup_elem),
7092 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
7093 BPF_EXIT_INSN(),
7094 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe),
7095 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
7096 BPF_JMP_A(0),
7097 BPF_EXIT_INSN(),
7098 },
7099 .fixup_map1 = { 3 },
7100 .errstr = "map_value pointer and 2147483646",
7101 .result = REJECT
7102 },
7103 {
7104 "bounds check map access with off+size signed 32bit overflow. test2",
7105 .insns = {
7106 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7107 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7108 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7109 BPF_LD_MAP_FD(BPF_REG_1, 0),
7110 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7111 BPF_FUNC_map_lookup_elem),
7112 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
7113 BPF_EXIT_INSN(),
7114 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
7115 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
7116 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
7117 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
7118 BPF_JMP_A(0),
7119 BPF_EXIT_INSN(),
7120 },
7121 .fixup_map1 = { 3 },
7122 .errstr = "pointer offset 1073741822",
7123 .result = REJECT
7124 },
7125 {
7126 "bounds check map access with off+size signed 32bit overflow. test3",
7127 .insns = {
7128 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7129 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7130 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7131 BPF_LD_MAP_FD(BPF_REG_1, 0),
7132 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7133 BPF_FUNC_map_lookup_elem),
7134 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
7135 BPF_EXIT_INSN(),
7136 BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
7137 BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
7138 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
7139 BPF_JMP_A(0),
7140 BPF_EXIT_INSN(),
7141 },
7142 .fixup_map1 = { 3 },
7143 .errstr = "pointer offset -1073741822",
7144 .result = REJECT
7145 },
7146 {
7147 "bounds check map access with off+size signed 32bit overflow. test4",
7148 .insns = {
7149 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7150 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7151 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7152 BPF_LD_MAP_FD(BPF_REG_1, 0),
7153 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7154 BPF_FUNC_map_lookup_elem),
7155 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
7156 BPF_EXIT_INSN(),
7157 BPF_MOV64_IMM(BPF_REG_1, 1000000),
7158 BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000),
7159 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
7160 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
7161 BPF_JMP_A(0),
7162 BPF_EXIT_INSN(),
7163 },
7164 .fixup_map1 = { 3 },
7165 .errstr = "map_value pointer and 1000000000000",
7166 .result = REJECT
7167 },
7168 {
7169 "pointer/scalar confusion in state equality check (way 1)",
7170 .insns = {
7171 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7172 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7173 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7174 BPF_LD_MAP_FD(BPF_REG_1, 0),
7175 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7176 BPF_FUNC_map_lookup_elem),
7177 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
7178 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
7179 BPF_JMP_A(1),
7180 BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
7181 BPF_JMP_A(0),
7182 BPF_EXIT_INSN(),
7183 },
7184 .fixup_map1 = { 3 },
7185 .result = ACCEPT,
7186 .result_unpriv = REJECT,
7187 .errstr_unpriv = "R0 leaks addr as return value"
7188 },
7189 {
7190 "pointer/scalar confusion in state equality check (way 2)",
7191 .insns = {
7192 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7193 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7194 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7195 BPF_LD_MAP_FD(BPF_REG_1, 0),
7196 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7197 BPF_FUNC_map_lookup_elem),
7198 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
7199 BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
7200 BPF_JMP_A(1),
7201 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
7202 BPF_EXIT_INSN(),
7203 },
7204 .fixup_map1 = { 3 },
7205 .result = ACCEPT,
7206 .result_unpriv = REJECT,
7207 .errstr_unpriv = "R0 leaks addr as return value"
7208 },
7209 {
6745 "variable-offset ctx access", 7210 "variable-offset ctx access",
6746 .insns = { 7211 .insns = {
6747 /* Get an unknown value */ 7212 /* Get an unknown value */
@@ -6783,6 +7248,71 @@ static struct bpf_test tests[] = {
6783 .prog_type = BPF_PROG_TYPE_LWT_IN, 7248 .prog_type = BPF_PROG_TYPE_LWT_IN,
6784 }, 7249 },
6785 { 7250 {
7251 "indirect variable-offset stack access",
7252 .insns = {
7253 /* Fill the top 8 bytes of the stack */
7254 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7255 /* Get an unknown value */
7256 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
7257 /* Make it small and 4-byte aligned */
7258 BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
7259 BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
7260 /* add it to fp. We now have either fp-4 or fp-8, but
7261 * we don't know which
7262 */
7263 BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
7264 /* dereference it indirectly */
7265 BPF_LD_MAP_FD(BPF_REG_1, 0),
7266 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7267 BPF_FUNC_map_lookup_elem),
7268 BPF_MOV64_IMM(BPF_REG_0, 0),
7269 BPF_EXIT_INSN(),
7270 },
7271 .fixup_map1 = { 5 },
7272 .errstr = "variable stack read R2",
7273 .result = REJECT,
7274 .prog_type = BPF_PROG_TYPE_LWT_IN,
7275 },
7276 {
7277 "direct stack access with 32-bit wraparound. test1",
7278 .insns = {
7279 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
7280 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
7281 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
7282 BPF_MOV32_IMM(BPF_REG_0, 0),
7283 BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
7284 BPF_EXIT_INSN()
7285 },
7286 .errstr = "fp pointer and 2147483647",
7287 .result = REJECT
7288 },
7289 {
7290 "direct stack access with 32-bit wraparound. test2",
7291 .insns = {
7292 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
7293 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
7294 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
7295 BPF_MOV32_IMM(BPF_REG_0, 0),
7296 BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
7297 BPF_EXIT_INSN()
7298 },
7299 .errstr = "fp pointer and 1073741823",
7300 .result = REJECT
7301 },
7302 {
7303 "direct stack access with 32-bit wraparound. test3",
7304 .insns = {
7305 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
7306 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
7307 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
7308 BPF_MOV32_IMM(BPF_REG_0, 0),
7309 BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
7310 BPF_EXIT_INSN()
7311 },
7312 .errstr = "fp pointer offset 1073741822",
7313 .result = REJECT
7314 },
7315 {
6786 "liveness pruning and write screening", 7316 "liveness pruning and write screening",
6787 .insns = { 7317 .insns = {
6788 /* Get an unknown value */ 7318 /* Get an unknown value */
@@ -7104,6 +7634,19 @@ static struct bpf_test tests[] = {
7104 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 7634 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
7105 }, 7635 },
7106 { 7636 {
7637 "pkt_end - pkt_start is allowed",
7638 .insns = {
7639 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
7640 offsetof(struct __sk_buff, data_end)),
7641 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
7642 offsetof(struct __sk_buff, data)),
7643 BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
7644 BPF_EXIT_INSN(),
7645 },
7646 .result = ACCEPT,
7647 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
7648 },
7649 {
7107 "XDP pkt read, pkt_end mangling, bad access 1", 7650 "XDP pkt read, pkt_end mangling, bad access 1",
7108 .insns = { 7651 .insns = {
7109 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 7652 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
@@ -7118,7 +7661,7 @@ static struct bpf_test tests[] = {
7118 BPF_MOV64_IMM(BPF_REG_0, 0), 7661 BPF_MOV64_IMM(BPF_REG_0, 0),
7119 BPF_EXIT_INSN(), 7662 BPF_EXIT_INSN(),
7120 }, 7663 },
7121 .errstr = "R1 offset is outside of the packet", 7664 .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
7122 .result = REJECT, 7665 .result = REJECT,
7123 .prog_type = BPF_PROG_TYPE_XDP, 7666 .prog_type = BPF_PROG_TYPE_XDP,
7124 }, 7667 },
@@ -7137,7 +7680,7 @@ static struct bpf_test tests[] = {
7137 BPF_MOV64_IMM(BPF_REG_0, 0), 7680 BPF_MOV64_IMM(BPF_REG_0, 0),
7138 BPF_EXIT_INSN(), 7681 BPF_EXIT_INSN(),
7139 }, 7682 },
7140 .errstr = "R1 offset is outside of the packet", 7683 .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
7141 .result = REJECT, 7684 .result = REJECT,
7142 .prog_type = BPF_PROG_TYPE_XDP, 7685 .prog_type = BPF_PROG_TYPE_XDP,
7143 }, 7686 },
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index e57b4ac40e72..7177bea1fdfa 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -1,3 +1,4 @@
1CONFIG_USER_NS=y 1CONFIG_USER_NS=y
2CONFIG_BPF_SYSCALL=y 2CONFIG_BPF_SYSCALL=y
3CONFIG_TEST_BPF=m 3CONFIG_TEST_BPF=m
4CONFIG_NUMA=y
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 66e5ce5b91f0..0304ffb714f2 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -627,13 +627,10 @@ static void do_multicpu_tests(void)
627static int finish_exec_test(void) 627static int finish_exec_test(void)
628{ 628{
629 /* 629 /*
630 * In a sensible world, this would be check_invalid_segment(0, 1); 630 * Older kernel versions did inherit the LDT on exec() which is
631 * For better or for worse, though, the LDT is inherited across exec. 631 * wrong because exec() starts from a clean state.
632 * We can probably change this safely, but for now we test it.
633 */ 632 */
634 check_valid_segment(0, 1, 633 check_invalid_segment(0, 1);
635 AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB,
636 42, true);
637 634
638 return nerrs ? 1 : 0; 635 return nerrs ? 1 : 0;
639} 636}
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index f9555b1e7f15..cc29a8148328 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -92,16 +92,23 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
92{ 92{
93 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; 93 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
94 struct arch_timer_context *vtimer; 94 struct arch_timer_context *vtimer;
95 u32 cnt_ctl;
95 96
96 if (!vcpu) { 97 /*
97 pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n"); 98 * We may see a timer interrupt after vcpu_put() has been called which
98 return IRQ_NONE; 99 * sets the CPU's vcpu pointer to NULL, because even though the timer
99 } 100 * has been disabled in vtimer_save_state(), the hardware interrupt
100 vtimer = vcpu_vtimer(vcpu); 101 * signal may not have been retired from the interrupt controller yet.
102 */
103 if (!vcpu)
104 return IRQ_HANDLED;
101 105
106 vtimer = vcpu_vtimer(vcpu);
102 if (!vtimer->irq.level) { 107 if (!vtimer->irq.level) {
103 vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); 108 cnt_ctl = read_sysreg_el0(cntv_ctl);
104 if (kvm_timer_irq_can_fire(vtimer)) 109 cnt_ctl &= ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT |
110 ARCH_TIMER_CTRL_IT_MASK;
111 if (cnt_ctl == (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT))
105 kvm_timer_update_irq(vcpu, true, vtimer); 112 kvm_timer_update_irq(vcpu, true, vtimer);
106 } 113 }
107 114
@@ -355,6 +362,7 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu)
355 362
356 /* Disable the virtual timer */ 363 /* Disable the virtual timer */
357 write_sysreg_el0(0, cntv_ctl); 364 write_sysreg_el0(0, cntv_ctl);
365 isb();
358 366
359 vtimer->loaded = false; 367 vtimer->loaded = false;
360out: 368out:
@@ -720,7 +728,7 @@ static int kvm_timer_dying_cpu(unsigned int cpu)
720 return 0; 728 return 0;
721} 729}
722 730
723int kvm_timer_hyp_init(void) 731int kvm_timer_hyp_init(bool has_gic)
724{ 732{
725 struct arch_timer_kvm_info *info; 733 struct arch_timer_kvm_info *info;
726 int err; 734 int err;
@@ -756,10 +764,13 @@ int kvm_timer_hyp_init(void)
756 return err; 764 return err;
757 } 765 }
758 766
759 err = irq_set_vcpu_affinity(host_vtimer_irq, kvm_get_running_vcpus()); 767 if (has_gic) {
760 if (err) { 768 err = irq_set_vcpu_affinity(host_vtimer_irq,
761 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 769 kvm_get_running_vcpus());
762 goto out_free_irq; 770 if (err) {
771 kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
772 goto out_free_irq;
773 }
763 } 774 }
764 775
765 kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); 776 kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
@@ -835,10 +846,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
835no_vgic: 846no_vgic:
836 preempt_disable(); 847 preempt_disable();
837 timer->enabled = 1; 848 timer->enabled = 1;
838 if (!irqchip_in_kernel(vcpu->kvm)) 849 kvm_timer_vcpu_load(vcpu);
839 kvm_timer_vcpu_load_user(vcpu);
840 else
841 kvm_timer_vcpu_load_vgic(vcpu);
842 preempt_enable(); 850 preempt_enable();
843 851
844 return 0; 852 return 0;
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 6b60c98a6e22..2e43f9d42bd5 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -1326,7 +1326,7 @@ static int init_subsystems(void)
1326 /* 1326 /*
1327 * Init HYP architected timer support 1327 * Init HYP architected timer support
1328 */ 1328 */
1329 err = kvm_timer_hyp_init(); 1329 err = kvm_timer_hyp_init(vgic_present);
1330 if (err) 1330 if (err)
1331 goto out; 1331 goto out;
1332 1332
diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c
index b6e715fd3c90..dac7ceb1a677 100644
--- a/virt/kvm/arm/mmio.c
+++ b/virt/kvm/arm/mmio.c
@@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
112 } 112 }
113 113
114 trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, 114 trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
115 data); 115 &data);
116 data = vcpu_data_host_to_guest(vcpu, data, len); 116 data = vcpu_data_host_to_guest(vcpu, data, len);
117 vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data); 117 vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
118 } 118 }
@@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
182 data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt), 182 data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
183 len); 183 len);
184 184
185 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data); 185 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data);
186 kvm_mmio_write_buf(data_buf, len, data); 186 kvm_mmio_write_buf(data_buf, len, data);
187 187
188 ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, 188 ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
189 data_buf); 189 data_buf);
190 } else { 190 } else {
191 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len, 191 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
192 fault_ipa, 0); 192 fault_ipa, NULL);
193 193
194 ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len, 194 ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
195 data_buf); 195 data_buf);
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index b36945d49986..b4b69c2d1012 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -509,8 +509,6 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
509 */ 509 */
510void free_hyp_pgds(void) 510void free_hyp_pgds(void)
511{ 511{
512 unsigned long addr;
513
514 mutex_lock(&kvm_hyp_pgd_mutex); 512 mutex_lock(&kvm_hyp_pgd_mutex);
515 513
516 if (boot_hyp_pgd) { 514 if (boot_hyp_pgd) {
@@ -521,10 +519,10 @@ void free_hyp_pgds(void)
521 519
522 if (hyp_pgd) { 520 if (hyp_pgd) {
523 unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE); 521 unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE);
524 for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) 522 unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET),
525 unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE); 523 (uintptr_t)high_memory - PAGE_OFFSET);
526 for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) 524 unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START),
527 unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE); 525 VMALLOC_END - VMALLOC_START);
528 526
529 free_pages((unsigned long)hyp_pgd, hyp_pgd_order); 527 free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
530 hyp_pgd = NULL; 528 hyp_pgd = NULL;