aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-01-02 08:46:35 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-01-02 08:46:35 -0500
commitb6a09416e83ffe4eccfb4ef1b91b3b66483fa810 (patch)
treeb30f266e85047244dcdb47d5afc134e76aec530d
parentdb809859c8cee415293b830e67178f526d1eb2be (diff)
parent30a7acd573899fd8b8ac39236eff6468b195ac7d (diff)
Merge 4.15-rc6 into char-misc-next
We want the fixes in here as well. Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--Documentation/admin-guide/kernel-parameters.rst1
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt18
-rw-r--r--Documentation/admin-guide/thunderbolt.rst2
-rw-r--r--Documentation/arm64/silicon-errata.txt1
-rw-r--r--Documentation/cgroup-v2.txt7
-rw-r--r--Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt2
-rw-r--r--Documentation/devicetree/bindings/sound/da7218.txt2
-rw-r--r--Documentation/devicetree/bindings/sound/da7219.txt2
-rw-r--r--Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt18
-rw-r--r--Documentation/filesystems/overlayfs.txt34
-rw-r--r--Documentation/locking/crossrelease.txt874
-rw-r--r--Documentation/vm/zswap.txt22
-rw-r--r--Documentation/x86/x86_64/mm.txt29
-rw-r--r--MAINTAINERS20
-rw-r--r--Makefile5
-rw-r--r--arch/arm/boot/dts/vf610-zii-dev-rev-c.dts4
-rw-r--r--arch/arm/lib/csumpartialcopyuser.S4
-rw-r--r--arch/arm64/Kconfig12
-rw-r--r--arch/arm64/include/asm/assembler.h10
-rw-r--r--arch/arm64/include/asm/cpufeature.h3
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/pgtable.h41
-rw-r--r--arch/arm64/kernel/cpu-reset.S1
-rw-r--r--arch/arm64/kernel/cpufeature.c3
-rw-r--r--arch/arm64/kernel/efi-entry.S2
-rw-r--r--arch/arm64/kernel/fpsimd.c2
-rw-r--r--arch/arm64/kernel/head.S1
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c2
-rw-r--r--arch/arm64/kernel/relocate_kernel.S1
-rw-r--r--arch/arm64/kvm/hyp-init.S1
-rw-r--r--arch/arm64/kvm/hyp/debug-sr.c3
-rw-r--r--arch/arm64/mm/dump.c2
-rw-r--r--arch/arm64/mm/fault.c5
-rw-r--r--arch/arm64/mm/init.c3
-rw-r--r--arch/parisc/boot/compressed/misc.c4
-rw-r--r--arch/parisc/include/asm/thread_info.h5
-rw-r--r--arch/parisc/kernel/entry.S12
-rw-r--r--arch/parisc/kernel/hpmc.S1
-rw-r--r--arch/parisc/kernel/unwind.c1
-rw-r--r--arch/parisc/lib/delay.c2
-rw-r--r--arch/powerpc/include/asm/mmu_context.h5
-rw-r--r--arch/powerpc/kernel/process.c2
-rw-r--r--arch/powerpc/kvm/book3s_xive.c7
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c6
-rw-r--r--arch/powerpc/perf/core-book3s.c8
-rw-r--r--arch/powerpc/perf/imc-pmu.c17
-rw-r--r--arch/powerpc/sysdev/fsl_msi.c4
-rw-r--r--arch/riscv/include/asm/barrier.h19
-rw-r--r--arch/riscv/kernel/setup.c11
-rw-r--r--arch/riscv/kernel/sys_riscv.c2
-rw-r--r--arch/s390/include/asm/pgtable.h6
-rw-r--r--arch/s390/kernel/compat_linux.c1
-rw-r--r--arch/s390/net/bpf_jit_comp.c11
-rw-r--r--arch/sparc/lib/hweight.S4
-rw-r--r--arch/sparc/mm/fault_32.c2
-rw-r--r--arch/sparc/mm/fault_64.c2
-rw-r--r--arch/sparc/mm/gup.c4
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c6
-rw-r--r--arch/um/include/asm/Kbuild1
-rw-r--r--arch/um/include/asm/mmu_context.h3
-rw-r--r--arch/um/kernel/trap.c2
-rw-r--r--arch/unicore32/include/asm/mmu_context.h5
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/Kconfig.debug1
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/boot/compressed/head_64.S16
-rw-r--r--arch/x86/boot/compressed/misc.c16
-rw-r--r--arch/x86/boot/compressed/pagetable.c3
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c28
-rw-r--r--arch/x86/boot/genimage.sh32
-rw-r--r--arch/x86/crypto/salsa20_glue.c7
-rw-r--r--arch/x86/entry/calling.h145
-rw-r--r--arch/x86/entry/entry_32.S14
-rw-r--r--arch/x86/entry/entry_64.S237
-rw-r--r--arch/x86/entry/entry_64_compat.S31
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c38
-rw-r--r--arch/x86/events/intel/core.c5
-rw-r--r--arch/x86/events/intel/ds.c130
-rw-r--r--arch/x86/events/perf_event.h23
-rw-r--r--arch/x86/include/asm/asm.h2
-rw-r--r--arch/x86/include/asm/cpu_entry_area.h81
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/cpufeatures.h4
-rw-r--r--arch/x86/include/asm/desc.h14
-rw-r--r--arch/x86/include/asm/disabled-features.h8
-rw-r--r--arch/x86/include/asm/espfix.h7
-rw-r--r--arch/x86/include/asm/fixmap.h7
-rw-r--r--arch/x86/include/asm/hypervisor.h25
-rw-r--r--arch/x86/include/asm/intel_ds.h36
-rw-r--r--arch/x86/include/asm/invpcid.h53
-rw-r--r--arch/x86/include/asm/irqdomain.h2
-rw-r--r--arch/x86/include/asm/irqflags.h3
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/mmu.h4
-rw-r--r--arch/x86/include/asm/mmu_context.h113
-rw-r--r--arch/x86/include/asm/paravirt.h9
-rw-r--r--arch/x86/include/asm/pgalloc.h11
-rw-r--r--arch/x86/include/asm/pgtable.h30
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h15
-rw-r--r--arch/x86/include/asm/pgtable_64.h92
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h51
-rw-r--r--arch/x86/include/asm/processor-flags.h5
-rw-r--r--arch/x86/include/asm/processor.h82
-rw-r--r--arch/x86/include/asm/pti.h14
-rw-r--r--arch/x86/include/asm/stacktrace.h3
-rw-r--r--arch/x86/include/asm/suspend_32.h8
-rw-r--r--arch/x86/include/asm/suspend_64.h19
-rw-r--r--arch/x86/include/asm/switch_to.h13
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/tlbflush.h312
-rw-r--r--arch/x86/include/asm/trace/irq_vectors.h16
-rw-r--r--arch/x86/include/asm/traps.h1
-rw-r--r--arch/x86/include/asm/unwind.h7
-rw-r--r--arch/x86/include/asm/vsyscall.h1
-rw-r--r--arch/x86/include/uapi/asm/processor-flags.h7
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/apic_noop.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c2
-rw-r--r--arch/x86/kernel/apic/msi.c8
-rw-r--r--arch/x86/kernel/apic/probe_32.c2
-rw-r--r--arch/x86/kernel/apic/vector.c20
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c10
-rw-r--r--arch/x86/kernel/asm-offsets_32.c9
-rw-r--r--arch/x86/kernel/asm-offsets_64.c4
-rw-r--r--arch/x86/kernel/cpu/common.c103
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c13
-rw-r--r--arch/x86/kernel/doublefault.c36
-rw-r--r--arch/x86/kernel/dumpstack.c81
-rw-r--r--arch/x86/kernel/dumpstack_32.c6
-rw-r--r--arch/x86/kernel/dumpstack_64.c12
-rw-r--r--arch/x86/kernel/head_64.S30
-rw-r--r--arch/x86/kernel/ioport.c2
-rw-r--r--arch/x86/kernel/irq.c12
-rw-r--r--arch/x86/kernel/irq_64.c4
-rw-r--r--arch/x86/kernel/ldt.c198
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c2
-rw-r--r--arch/x86/kernel/process.c19
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c14
-rw-r--r--arch/x86/kernel/smpboot.c19
-rw-r--r--arch/x86/kernel/stacktrace.c6
-rw-r--r--arch/x86/kernel/tls.c11
-rw-r--r--arch/x86/kernel/traps.c77
-rw-r--r--arch/x86/kernel/unwind_orc.c88
-rw-r--r--arch/x86/kernel/vmlinux.lds.S17
-rw-r--r--arch/x86/kvm/emulate.c32
-rw-r--r--arch/x86/kvm/mmu.c8
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c48
-rw-r--r--arch/x86/lib/delay.c4
-rw-r--r--arch/x86/lib/x86-opcode-map.txt13
-rw-r--r--arch/x86/mm/Makefile9
-rw-r--r--arch/x86/mm/cpu_entry_area.c166
-rw-r--r--arch/x86/mm/debug_pagetables.c80
-rw-r--r--arch/x86/mm/dump_pagetables.c141
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/init.c80
-rw-r--r--arch/x86/mm/init_32.c6
-rw-r--r--arch/x86/mm/ioremap.c4
-rw-r--r--arch/x86/mm/kasan_init_64.c23
-rw-r--r--arch/x86/mm/kmmio.c12
-rw-r--r--arch/x86/mm/mem_encrypt.c4
-rw-r--r--arch/x86/mm/pgtable.c5
-rw-r--r--arch/x86/mm/pgtable_32.c1
-rw-r--r--arch/x86/mm/pti.c387
-rw-r--r--arch/x86/mm/tlb.c64
-rw-r--r--arch/x86/pci/fixup.c27
-rw-r--r--arch/x86/platform/efi/efi_64.c5
-rw-r--r--arch/x86/platform/uv/tlb_uv.c2
-rw-r--r--arch/x86/platform/uv/uv_irq.c2
-rw-r--r--arch/x86/power/cpu.c115
-rw-r--r--arch/x86/xen/apic.c2
-rw-r--r--arch/x86/xen/enlighten.c81
-rw-r--r--arch/x86/xen/enlighten_pv.c5
-rw-r--r--arch/x86/xen/mmu_pv.c14
-rw-r--r--arch/x86/xen/setup.c6
-rw-r--r--block/bio.c2
-rw-r--r--block/blk-map.c38
-rw-r--r--block/blk-throttle.c8
-rw-r--r--block/bounce.c6
-rw-r--r--block/kyber-iosched.c37
-rw-r--r--crypto/af_alg.c19
-rw-r--r--crypto/algif_aead.c18
-rw-r--r--crypto/algif_skcipher.c16
-rw-r--r--crypto/hmac.c6
-rw-r--r--crypto/mcryptd.c23
-rw-r--r--crypto/rsa_helper.c2
-rw-r--r--crypto/salsa20_generic.c7
-rw-r--r--crypto/shash.c5
-rw-r--r--crypto/skcipher.c10
-rw-r--r--drivers/acpi/apei/erst.c2
-rw-r--r--drivers/acpi/cppc_acpi.c2
-rw-r--r--drivers/acpi/device_pm.c2
-rw-r--r--drivers/acpi/nfit/core.c9
-rw-r--r--drivers/android/binder.c44
-rw-r--r--drivers/ata/ahci_mtk.c6
-rw-r--r--drivers/ata/ahci_qoriq.c12
-rw-r--r--drivers/ata/libata-core.c12
-rw-r--r--drivers/ata/pata_pdc2027x.c16
-rw-r--r--drivers/base/cacheinfo.c13
-rw-r--r--drivers/base/power/main.c15
-rw-r--r--drivers/block/null_blk.c4
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c44
-rw-r--r--drivers/char/ipmi/ipmi_si_parisc.c2
-rw-r--r--drivers/char/ipmi/ipmi_si_pci.c7
-rw-r--r--drivers/clk/clk.c8
-rw-r--r--drivers/clk/sunxi/clk-sun9i-mmc.c12
-rw-r--r--drivers/cpufreq/cpufreq_governor.c19
-rw-r--r--drivers/cpufreq/imx6q-cpufreq.c11
-rw-r--r--drivers/dma/at_hdmac.c4
-rw-r--r--drivers/dma/dma-jz4740.c4
-rw-r--r--drivers/dma/dmatest.c55
-rw-r--r--drivers/dma/fsl-edma.c28
-rw-r--r--drivers/dma/ioat/init.c2
-rw-r--r--drivers/gpio/gpio-bcm-kona.c3
-rw-r--r--drivers/gpio/gpio-brcmstb.c4
-rw-r--r--drivers/gpio/gpio-reg.c4
-rw-r--r--drivers/gpio/gpio-tegra.c4
-rw-r--r--drivers/gpio/gpio-xgene-sb.c2
-rw-r--r--drivers/gpio/gpiolib-acpi.c2
-rw-r--r--drivers/gpio/gpiolib-devprop.c17
-rw-r--r--drivers/gpio/gpiolib-of.c3
-rw-r--r--drivers/gpio/gpiolib.c27
-rw-r--r--drivers/gpio/gpiolib.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c13
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c51
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c26
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c9
-rw-r--r--drivers/gpu/drm/drm_connector.c63
-rw-r--r--drivers/gpu/drm/drm_crtc_internal.h1
-rw-r--r--drivers/gpu/drm/drm_edid.c52
-rw-r--r--drivers/gpu/drm/drm_lease.c26
-rw-r--r--drivers/gpu/drm/drm_mm.c8
-rw-r--r--drivers/gpu/drm/drm_mode_config.c5
-rw-r--r--drivers/gpu/drm/drm_plane.c42
-rw-r--r--drivers/gpu/drm/drm_syncobj.c77
-rw-r--r--drivers/gpu/drm/i915/gvt/display.c5
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c9
-rw-r--r--drivers/gpu/drm/i915/i915_sw_fence.c3
-rw-r--r--drivers/gpu/drm/i915/intel_breadcrumbs.c22
-rw-r--r--drivers/gpu/drm/i915/intel_ddi.c4
-rw-r--r--drivers/gpu/drm/i915/intel_display.c5
-rw-r--r--drivers/gpu/drm/i915/intel_lpe_audio.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c5
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drm.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drv.h11
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fbcon.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_mem.c6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_ttm.c39
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_vmm.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/base.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c9
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c7
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c20
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_tcon.c4
-rw-r--r--drivers/gpu/drm/ttm/ttm_page_alloc.c3
-rw-r--r--drivers/gpu/drm/vc4/vc4_gem.c4
-rw-r--r--drivers/gpu/drm/vc4/vc4_irq.c1
-rw-r--r--drivers/hid/hid-core.c2
-rw-r--r--drivers/hid/hid-cp2112.c15
-rw-r--r--drivers/hid/hid-holtekff.c8
-rw-r--r--drivers/hv/vmbus_drv.c2
-rw-r--r--drivers/hwmon/hwmon.c21
-rw-r--r--drivers/hwtracing/stm/ftrace.c6
-rw-r--r--drivers/i2c/busses/i2c-cht-wc.c2
-rw-r--r--drivers/i2c/busses/i2c-piix4.c2
-rw-r--r--drivers/i2c/busses/i2c-stm32.h3
-rw-r--r--drivers/i2c/busses/i2c-stm32f4.c3
-rw-r--r--drivers/i2c/busses/i2c-stm32f7.c3
-rw-r--r--drivers/infiniband/core/cma.c2
-rw-r--r--drivers/infiniband/core/device.c2
-rw-r--r--drivers/infiniband/core/iwcm.c2
-rw-r--r--drivers/infiniband/core/nldev.c2
-rw-r--r--drivers/infiniband/core/security.c10
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c10
-rw-r--r--drivers/infiniband/core/verbs.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c16
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h2
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c94
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h6
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h1
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c30
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c26
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c11
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h2
-rw-r--r--drivers/infiniband/hw/mlx5/main.c43
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h4
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c1
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma.h6
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c7
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c17
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c14
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c7
-rw-r--r--drivers/iommu/amd_iommu.c2
-rw-r--r--drivers/iommu/intel_irq_remapping.c2
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c4
-rw-r--r--drivers/irqchip/irq-renesas-intc-irqpin.c6
-rw-r--r--drivers/leds/led-core.c2
-rw-r--r--drivers/md/dm-bufio.c8
-rw-r--r--drivers/md/dm-cache-target.c12
-rw-r--r--drivers/md/dm-mpath.c67
-rw-r--r--drivers/md/dm-snap.c48
-rw-r--r--drivers/md/dm-table.c5
-rw-r--r--drivers/md/dm-thin.c22
-rw-r--r--drivers/mfd/arizona-irq.c4
-rw-r--r--drivers/mfd/cros_ec_spi.c53
-rw-r--r--drivers/mfd/twl4030-audio.c9
-rw-r--r--drivers/mfd/twl6040.c12
-rw-r--r--drivers/misc/eeprom/at24.c26
-rw-r--r--drivers/misc/pti.c2
-rw-r--r--drivers/mmc/core/card.h2
-rw-r--r--drivers/mmc/core/mmc.c2
-rw-r--r--drivers/mmc/core/quirks.h8
-rw-r--r--drivers/mtd/mtdcore.c2
-rw-r--r--drivers/mtd/nand/brcmnand/brcmnand.c2
-rw-r--r--drivers/mtd/nand/gpio.c6
-rw-r--r--drivers/mtd/nand/gpmi-nand/gpmi-nand.c6
-rw-r--r--drivers/net/dsa/mv88e6xxx/port.c1
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_cfg.h5
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c16
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_hw.h29
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_nic.c82
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_nic.h2
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c5
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c17
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c29
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h6
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c80
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h6
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/ver.h6
-rw-r--r--drivers/net/ethernet/arc/emac.h2
-rw-r--r--drivers/net/ethernet/arc/emac_main.c164
-rw-r--r--drivers/net/ethernet/arc/emac_rockchip.c10
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c4
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c14
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c21
-rw-r--r--drivers/net/ethernet/broadcom/tg3.h7
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c6
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c8
-rw-r--r--drivers/net/ethernet/marvell/skge.c1
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_port.c57
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_selftest.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/resource_tracker.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c63
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.c56
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c75
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/rl.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vxlan.c64
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vxlan.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c15
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.c55
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h8
-rw-r--r--drivers/net/ethernet/qualcomm/emac/emac-phy.c7
-rw-r--r--drivers/net/ethernet/qualcomm/emac/emac.c6
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c27
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.c10
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c5
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/enh_desc.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/norm_desc.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c2
-rw-r--r--drivers/net/hippi/rrunner.c2
-rw-r--r--drivers/net/phy/at803x.c4
-rw-r--r--drivers/net/phy/marvell.c18
-rw-r--r--drivers/net/phy/mdio-xgene.c21
-rw-r--r--drivers/net/phy/mdio_bus.c1
-rw-r--r--drivers/net/phy/meson-gxl.c74
-rw-r--r--drivers/net/phy/micrel.c1
-rw-r--r--drivers/net/phy/phy.c9
-rw-r--r--drivers/net/phy/phy_device.c10
-rw-r--r--drivers/net/phy/phylink.c2
-rw-r--r--drivers/net/usb/qmi_wwan.c2
-rw-r--r--drivers/net/vxlan.c19
-rw-r--r--drivers/net/wireless/mac80211_hwsim.c3
-rw-r--r--drivers/nvdimm/btt.c201
-rw-r--r--drivers/nvdimm/btt.h47
-rw-r--r--drivers/nvdimm/pfn_devs.c20
-rw-r--r--drivers/nvme/host/core.c11
-rw-r--r--drivers/nvme/host/fc.c1
-rw-r--r--drivers/nvmem/meson-mx-efuse.c4
-rw-r--r--drivers/of/of_mdio.c3
-rw-r--r--drivers/parisc/lba_pci.c33
-rw-r--r--drivers/pci/host/pci-hyperv.c8
-rw-r--r--drivers/pci/host/pcie-rcar.c8
-rw-r--r--drivers/pci/pci-driver.c9
-rw-r--r--drivers/phy/motorola/phy-cpcap-usb.c2
-rw-r--r--drivers/phy/renesas/Kconfig2
-rw-r--r--drivers/phy/rockchip/phy-rockchip-typec.c2
-rw-r--r--drivers/phy/tegra/xusb.c58
-rw-r--r--drivers/pinctrl/intel/pinctrl-cherryview.c16
-rw-r--r--drivers/pinctrl/pinctrl-single.c5
-rw-r--r--drivers/pinctrl/stm32/pinctrl-stm32.c2
-rw-r--r--drivers/platform/x86/asus-wireless.c1
-rw-r--r--drivers/platform/x86/dell-laptop.c17
-rw-r--r--drivers/platform/x86/dell-wmi.c2
-rw-r--r--drivers/s390/net/qeth_core.h6
-rw-r--r--drivers/s390/net/qeth_core_main.c15
-rw-r--r--drivers/s390/net/qeth_l3.h2
-rw-r--r--drivers/s390/net/qeth_l3_main.c36
-rw-r--r--drivers/s390/net/qeth_l3_sys.c75
-rw-r--r--drivers/scsi/aacraid/aacraid.h1
-rw-r--r--drivers/scsi/aacraid/commsup.c8
-rw-r--r--drivers/scsi/aacraid/linit.c2
-rw-r--r--drivers/scsi/bfa/bfad_bsg.c6
-rw-r--r--drivers/scsi/bfa/bfad_im.c6
-rw-r--r--drivers/scsi/bfa/bfad_im.h10
-rw-r--r--drivers/scsi/libfc/fc_lport.c4
-rw-r--r--drivers/scsi/libsas/sas_expander.c10
-rw-r--r--drivers/scsi/lpfc/lpfc_mem.c2
-rw-r--r--drivers/scsi/osd/osd_initiator.c4
-rw-r--r--drivers/scsi/scsi_debugfs.c6
-rw-r--r--drivers/scsi/scsi_devinfo.c33
-rw-r--r--drivers/scsi/scsi_lib.c2
-rw-r--r--drivers/scsi/scsi_scan.c13
-rw-r--r--drivers/scsi/scsi_sysfs.c10
-rw-r--r--drivers/scsi/scsi_transport_spi.c12
-rw-r--r--drivers/scsi/sd.c4
-rw-r--r--drivers/scsi/storvsc_drv.c3
-rw-r--r--drivers/spi/spi-armada-3700.c8
-rw-r--r--drivers/spi/spi-atmel.c2
-rw-r--r--drivers/spi/spi-rspi.c4
-rw-r--r--drivers/spi/spi-sun4i.c2
-rw-r--r--drivers/spi/spi-xilinx.c11
-rw-r--r--drivers/staging/android/ion/Kconfig2
-rw-r--r--drivers/staging/android/ion/ion.c4
-rw-r--r--drivers/staging/android/ion/ion_cma_heap.c15
-rw-r--r--drivers/staging/ccree/ssi_hash.c2
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c23
-rw-r--r--drivers/staging/pi433/rf69.c2
-rw-r--r--drivers/target/target_core_pscsi.c4
-rw-r--r--drivers/thunderbolt/nhi.c2
-rw-r--r--drivers/tty/n_tty.c4
-rw-r--r--drivers/usb/chipidea/ci_hdrc_msm.c2
-rw-r--r--drivers/usb/core/config.c6
-rw-r--r--drivers/usb/core/quirks.c6
-rw-r--r--drivers/usb/dwc2/core.h4
-rw-r--r--drivers/usb/dwc2/gadget.c42
-rw-r--r--drivers/usb/dwc2/params.c29
-rw-r--r--drivers/usb/dwc3/dwc3-of-simple.c5
-rw-r--r--drivers/usb/dwc3/gadget.c4
-rw-r--r--drivers/usb/gadget/Kconfig4
-rw-r--r--drivers/usb/gadget/legacy/Kconfig12
-rw-r--r--drivers/usb/host/xhci-debugfs.c16
-rw-r--r--drivers/usb/host/xhci-mem.c15
-rw-r--r--drivers/usb/host/xhci-pci.c3
-rw-r--r--drivers/usb/host/xhci-ring.c6
-rw-r--r--drivers/usb/host/xhci.c6
-rw-r--r--drivers/usb/musb/da8xx.c10
-rw-r--r--drivers/usb/serial/ftdi_sio.c1
-rw-r--r--drivers/usb/serial/ftdi_sio_ids.h6
-rw-r--r--drivers/usb/serial/option.c17
-rw-r--r--drivers/usb/serial/qcserial.c3
-rw-r--r--drivers/usb/storage/unusual_devs.h7
-rw-r--r--drivers/usb/storage/unusual_uas.h7
-rw-r--r--drivers/usb/usbip/stub_dev.c3
-rw-r--r--drivers/usb/usbip/stub_main.c5
-rw-r--r--drivers/usb/usbip/stub_rx.c47
-rw-r--r--drivers/usb/usbip/stub_tx.c13
-rw-r--r--drivers/usb/usbip/usbip_common.c16
-rw-r--r--drivers/usb/usbip/usbip_common.h1
-rw-r--r--drivers/usb/usbip/vhci_hcd.c12
-rw-r--r--drivers/usb/usbip/vhci_rx.c23
-rw-r--r--drivers/usb/usbip/vhci_sysfs.c25
-rw-r--r--drivers/usb/usbip/vhci_tx.c3
-rw-r--r--drivers/virtio/virtio_mmio.c43
-rw-r--r--drivers/xen/Kconfig2
-rw-r--r--drivers/xen/balloon.c65
-rw-r--r--fs/autofs4/waitq.c1
-rw-r--r--fs/ceph/mds_client.c42
-rw-r--r--fs/cifs/smb2ops.c3
-rw-r--r--fs/cifs/smb2pdu.c30
-rw-r--r--fs/cramfs/Kconfig1
-rw-r--r--fs/dax.c3
-rw-r--r--fs/exec.c14
-rw-r--r--fs/ext4/extents.c1
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c9
-rw-r--r--fs/ext4/namei.c4
-rw-r--r--fs/namespace.c1
-rw-r--r--fs/nfs/client.c11
-rw-r--r--fs/nfs/nfs4client.c17
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nfsd/auth.c3
-rw-r--r--fs/overlayfs/Kconfig10
-rw-r--r--fs/overlayfs/dir.c3
-rw-r--r--fs/overlayfs/namei.c18
-rw-r--r--fs/overlayfs/overlayfs.h2
-rw-r--r--fs/overlayfs/ovl_entry.h2
-rw-r--r--fs/overlayfs/readdir.c7
-rw-r--r--fs/overlayfs/super.c87
-rw-r--r--fs/super.c37
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c4
-rw-r--r--fs/xfs/libxfs/xfs_attr.c20
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c9
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h3
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c2
-rw-r--r--fs/xfs/libxfs/xfs_defer.c39
-rw-r--r--fs/xfs/libxfs/xfs_defer.h5
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c10
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h1
-rw-r--r--fs/xfs/libxfs/xfs_iext_tree.c4
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c52
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c99
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h16
-rw-r--r--fs/xfs/scrub/scrub.c1
-rw-r--r--fs/xfs/scrub/trace.c1
-rw-r--r--fs/xfs/xfs_extfree_item.c2
-rw-r--r--fs/xfs/xfs_fsops.c5
-rw-r--r--fs/xfs/xfs_icache.c35
-rw-r--r--fs/xfs/xfs_icache.h1
-rw-r--r--fs/xfs/xfs_inode.c61
-rw-r--r--fs/xfs/xfs_inode.h3
-rw-r--r--fs/xfs/xfs_iomap.c2
-rw-r--r--fs/xfs/xfs_qm.c4
-rw-r--r--fs/xfs/xfs_reflink.c23
-rw-r--r--fs/xfs/xfs_super.c9
-rw-r--r--fs/xfs/xfs_symlink.c15
-rw-r--r--fs/xfs/xfs_trace.c1
-rw-r--r--include/asm-generic/mm_hooks.h5
-rw-r--r--include/asm-generic/pgtable.h5
-rw-r--r--include/crypto/internal/hash.h8
-rw-r--r--include/crypto/mcryptd.h1
-rw-r--r--include/drm/drm_connector.h10
-rw-r--r--include/drm/drm_edid.h2
-rw-r--r--include/drm/drm_mode_config.h18
-rw-r--r--include/kvm/arm_arch_timer.h2
-rw-r--r--include/linux/bio.h2
-rw-r--r--include/linux/blk_types.h9
-rw-r--r--include/linux/blkdev.h25
-rw-r--r--include/linux/bpf_verifier.h4
-rw-r--r--include/linux/compiler.h47
-rw-r--r--include/linux/completion.h45
-rw-r--r--include/linux/cpuhotplug.h2
-rw-r--r--include/linux/cred.h1
-rw-r--r--include/linux/gpio/driver.h33
-rw-r--r--include/linux/idr.h1
-rw-r--r--include/linux/intel-pti.h43
-rw-r--r--include/linux/ipv6.h3
-rw-r--r--include/linux/irq.h17
-rw-r--r--include/linux/irqdesc.h9
-rw-r--r--include/linux/irqdomain.h2
-rw-r--r--include/linux/lockdep.h125
-rw-r--r--include/linux/mfd/rtsx_pci.h2
-rw-r--r--include/linux/mlx5/driver.h7
-rw-r--r--include/linux/mlx5/mlx5_ifc.h8
-rw-r--r--include/linux/oom.h9
-rw-r--r--include/linux/pci.h3
-rw-r--r--include/linux/pm.h1
-rw-r--r--include/linux/pti.h50
-rw-r--r--include/linux/ptr_ring.h9
-rw-r--r--include/linux/rbtree.h2
-rw-r--r--include/linux/rwlock_types.h3
-rw-r--r--include/linux/sched.h17
-rw-r--r--include/linux/sched/coredump.h1
-rw-r--r--include/linux/spi/spi.h2
-rw-r--r--include/linux/spinlock.h5
-rw-r--r--include/linux/spinlock_types.h3
-rw-r--r--include/linux/string.h5
-rw-r--r--include/linux/tick.h1
-rw-r--r--include/linux/timer.h4
-rw-r--r--include/linux/trace.h2
-rw-r--r--include/net/cfg80211.h1
-rw-r--r--include/net/gue.h18
-rw-r--r--include/net/ip.h1
-rw-r--r--include/net/pkt_cls.h5
-rw-r--r--include/net/sch_generic.h1
-rw-r--r--include/net/sock.h5
-rw-r--r--include/net/xfrm.h3
-rw-r--r--include/trace/events/clk.h4
-rw-r--r--include/trace/events/kvm.h7
-rw-r--r--include/trace/events/preemptirq.h11
-rw-r--r--include/trace/events/tcp.h97
-rw-r--r--include/uapi/linux/pkt_sched.h1
-rw-r--r--include/uapi/linux/rtnetlink.h1
-rw-r--r--include/xen/balloon.h5
-rw-r--r--init/Kconfig6
-rw-r--r--init/main.c16
-rw-r--r--kernel/bpf/hashtab.c2
-rw-r--r--kernel/bpf/verifier.c283
-rw-r--r--kernel/cgroup/debug.c4
-rw-r--r--kernel/cgroup/stat.c8
-rw-r--r--kernel/cpu.c12
-rw-r--r--kernel/exit.c8
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/groups.c5
-rw-r--r--kernel/irq/debug.h5
-rw-r--r--kernel/irq/debugfs.c1
-rw-r--r--kernel/irq/generic-chip.c11
-rw-r--r--kernel/irq/internals.h2
-rw-r--r--kernel/irq/irqdomain.c13
-rw-r--r--kernel/irq/msi.c64
-rw-r--r--kernel/kcov.c4
-rw-r--r--kernel/locking/lockdep.c652
-rw-r--r--kernel/locking/spinlock.c13
-rw-r--r--kernel/sched/core.c22
-rw-r--r--kernel/sched/cpufreq_schedutil.c2
-rw-r--r--kernel/sched/rt.c8
-rw-r--r--kernel/time/Kconfig1
-rw-r--r--kernel/time/posix-timers.c29
-rw-r--r--kernel/time/tick-sched.c32
-rw-r--r--kernel/time/timer.c35
-rw-r--r--kernel/trace/Kconfig1
-rw-r--r--kernel/trace/bpf_trace.c19
-rw-r--r--kernel/trace/ring_buffer.c18
-rw-r--r--kernel/trace/trace.c54
-rw-r--r--kernel/trace/trace_stack.c4
-rw-r--r--kernel/uid16.c1
-rw-r--r--kernel/workqueue.c33
-rw-r--r--lib/Kconfig.debug33
-rw-r--r--lib/kobject_uevent.c16
-rw-r--r--lib/rbtree.c10
-rw-r--r--lib/test_bpf.c43
-rw-r--r--lib/timerqueue.c8
-rw-r--r--mm/backing-dev.c5
-rw-r--r--mm/early_ioremap.c2
-rw-r--r--mm/frame_vector.c6
-rw-r--r--mm/gup.c2
-rw-r--r--mm/hmm.c8
-rw-r--r--mm/huge_memory.c6
-rw-r--r--mm/kmemleak.c2
-rw-r--r--mm/memory.c11
-rw-r--r--mm/mmap.c10
-rw-r--r--mm/oom_kill.c4
-rw-r--r--mm/page_alloc.c11
-rw-r--r--mm/percpu.c4
-rw-r--r--mm/slab.c23
-rw-r--r--net/batman-adv/bat_iv_ogm.c4
-rw-r--r--net/batman-adv/bat_v.c2
-rw-r--r--net/batman-adv/fragmentation.c2
-rw-r--r--net/batman-adv/tp_meter.c4
-rw-r--r--net/bridge/br_netlink.c11
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/netprio_cgroup.c1
-rw-r--r--net/core/skbuff.c17
-rw-r--r--net/dsa/slave.c1
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/fib_frontend.c9
-rw-r--r--net/ipv4/fib_semantics.c8
-rw-r--r--net/ipv4/igmp.c44
-rw-r--r--net/ipv4/ip_gre.c3
-rw-r--r--net/ipv4/ip_tunnel.c4
-rw-r--r--net/ipv4/netfilter/arp_tables.c1
-rw-r--r--net/ipv4/netfilter/ip_tables.c1
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c3
-rw-r--r--net/ipv4/raw.c15
-rw-r--r--net/ipv4/tcp_input.c10
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_timer.c2
-rw-r--r--net/ipv4/xfrm4_input.c12
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/ip6_gre.c58
-rw-r--r--net/ipv6/ip6_output.c12
-rw-r--r--net/ipv6/ip6_tunnel.c9
-rw-r--r--net/ipv6/ipv6_sockglue.c1
-rw-r--r--net/ipv6/mcast.c25
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c8
-rw-r--r--net/ipv6/route.c20
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/ipv6/xfrm6_input.c10
-rw-r--r--net/mac80211/ht.c5
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c128
-rw-r--r--net/netfilter/nf_conntrack_netlink.c13
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c3
-rw-r--r--net/netfilter/nf_tables_api.c7
-rw-r--r--net/netfilter/nfnetlink_cthelper.c10
-rw-r--r--net/netfilter/nfnetlink_log.c5
-rw-r--r--net/netfilter/nfnetlink_queue.c5
-rw-r--r--net/netfilter/nft_exthdr.c2
-rw-r--r--net/netfilter/x_tables.c9
-rw-r--r--net/netfilter/xt_bpf.c6
-rw-r--r--net/netfilter/xt_osf.c7
-rw-r--r--net/netlink/af_netlink.c3
-rw-r--r--net/openvswitch/flow.c15
-rw-r--r--net/rds/send.c3
-rw-r--r--net/sched/act_meta_mark.c1
-rw-r--r--net/sched/act_meta_skbtcindex.c1
-rw-r--r--net/sched/cls_api.c3
-rw-r--r--net/sched/cls_bpf.c93
-rw-r--r--net/sched/cls_u32.c1
-rw-r--r--net/sched/sch_api.c2
-rw-r--r--net/sched/sch_generic.c4
-rw-r--r--net/sched/sch_ingress.c15
-rw-r--r--net/sched/sch_red.c31
-rw-r--r--net/sctp/debug.c3
-rw-r--r--net/sctp/socket.c10
-rw-r--r--net/sctp/ulpqueue.c24
-rw-r--r--net/strparser/strparser.c2
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c1
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c1
-rw-r--r--net/sunrpc/svcauth_unix.c2
-rw-r--r--net/sunrpc/xprt.c28
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c6
-rw-r--r--net/sunrpc/xprtrdma/transport.c2
-rw-r--r--net/sunrpc/xprtrdma/verbs.c2
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h1
-rw-r--r--net/tipc/bearer.c5
-rw-r--r--net/tipc/group.c47
-rw-r--r--net/tipc/monitor.c6
-rw-r--r--net/tipc/socket.c4
-rw-r--r--net/wireless/Makefile39
-rw-r--r--net/wireless/certs/sforshee.hex86
-rw-r--r--net/wireless/certs/sforshee.x509bin680 -> 0 bytes
-rw-r--r--net/wireless/nl80211.c6
-rw-r--r--net/xfrm/xfrm_input.c69
-rw-r--r--net/xfrm/xfrm_policy.c9
-rw-r--r--net/xfrm/xfrm_state.c1
-rw-r--r--net/xfrm/xfrm_user.c26
-rwxr-xr-xscripts/checkpatch.pl22
-rwxr-xr-xscripts/faddr2line8
-rw-r--r--security/Kconfig10
-rw-r--r--sound/core/rawmidi.c15
-rw-r--r--sound/hda/hdac_i915.c2
-rw-r--r--sound/pci/hda/patch_conexant.c29
-rw-r--r--sound/pci/hda/patch_hdmi.c6
-rw-r--r--sound/pci/hda/patch_realtek.c49
-rw-r--r--sound/soc/amd/acp-pcm-dma.c7
-rw-r--r--sound/soc/atmel/Kconfig2
-rw-r--r--sound/soc/codecs/da7218.c2
-rw-r--r--sound/soc/codecs/msm8916-wcd-analog.c2
-rw-r--r--sound/soc/codecs/msm8916-wcd-digital.c4
-rw-r--r--sound/soc/codecs/nau8825.c1
-rw-r--r--sound/soc/codecs/rt5514-spi.c15
-rw-r--r--sound/soc/codecs/rt5514.c2
-rw-r--r--sound/soc/codecs/rt5645.c2
-rw-r--r--sound/soc/codecs/rt5663.c4
-rw-r--r--sound/soc/codecs/rt5663.h4
-rw-r--r--sound/soc/codecs/tlv320aic31xx.h2
-rw-r--r--sound/soc/codecs/twl4030.c4
-rw-r--r--sound/soc/codecs/wm_adsp.c12
-rw-r--r--sound/soc/fsl/fsl_asrc.h4
-rw-r--r--sound/soc/fsl/fsl_ssi.c44
-rw-r--r--sound/soc/intel/boards/kbl_rt5663_max98927.c2
-rw-r--r--sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c2
-rw-r--r--sound/soc/intel/skylake/skl-nhlt.c15
-rw-r--r--sound/soc/intel/skylake/skl-topology.c2
-rw-r--r--sound/soc/rockchip/rockchip_spdif.c18
-rw-r--r--sound/soc/sh/rcar/adg.c6
-rw-r--r--sound/soc/sh/rcar/core.c4
-rw-r--r--sound/soc/sh/rcar/dma.c86
-rw-r--r--sound/soc/sh/rcar/ssi.c16
-rw-r--r--sound/soc/sh/rcar/ssiu.c5
-rw-r--r--sound/usb/mixer.c27
-rw-r--r--sound/usb/quirks.c7
-rw-r--r--tools/arch/s390/include/uapi/asm/bpf_perf_event.h2
-rw-r--r--tools/arch/s390/include/uapi/asm/perf_regs.h44
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h1
-rw-r--r--tools/bpf/bpftool/map.c8
-rw-r--r--tools/bpf/bpftool/prog.c2
-rw-r--r--tools/include/linux/compiler.h21
-rw-r--r--tools/include/linux/lockdep.h1
-rw-r--r--tools/include/uapi/asm/bpf_perf_event.h7
-rw-r--r--tools/include/uapi/linux/kvm.h4
-rwxr-xr-xtools/kvm/kvm_stat/kvm_stat74
-rw-r--r--tools/kvm/kvm_stat/kvm_stat.txt4
-rw-r--r--tools/objtool/arch/x86/decode.c2
-rw-r--r--tools/objtool/arch/x86/lib/x86-opcode-map.txt15
-rw-r--r--tools/objtool/builtin-orc.c4
-rw-r--r--tools/objtool/orc_gen.c2
-rw-r--r--tools/perf/Makefile.config9
-rw-r--r--tools/perf/arch/s390/include/perf_regs.h2
-rwxr-xr-xtools/perf/check-headers.sh1
-rw-r--r--tools/perf/jvmti/jvmti_agent.c16
-rw-r--r--tools/perf/jvmti/jvmti_agent.h7
-rw-r--r--tools/perf/jvmti/libjvmti.c147
-rw-r--r--tools/perf/util/intel-pt-decoder/x86-opcode-map.txt13
-rw-r--r--tools/perf/util/mmap.h2
-rw-r--r--tools/testing/selftests/bpf/Makefile17
-rw-r--r--tools/testing/selftests/bpf/test_progs.c8
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c629
-rw-r--r--tools/testing/selftests/net/config1
-rw-r--r--tools/testing/selftests/x86/ldt_gdt.c12
-rw-r--r--tools/usb/usbip/libsrc/vhci_driver.c10
-rw-r--r--tools/usb/usbip/src/utils.c9
-rw-r--r--tools/virtio/ringtest/ptr_ring.c29
-rw-r--r--tools/vm/slabinfo-gnuplot.sh2
-rw-r--r--virt/kvm/arm/arch_timer.c40
-rw-r--r--virt/kvm/arm/arm.c2
-rw-r--r--virt/kvm/arm/mmio.c6
-rw-r--r--virt/kvm/arm/mmu.c10
806 files changed, 9729 insertions, 5644 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst
index b2598cc9834c..7242cbda15dd 100644
--- a/Documentation/admin-guide/kernel-parameters.rst
+++ b/Documentation/admin-guide/kernel-parameters.rst
@@ -109,6 +109,7 @@ parameter is applicable::
109 IPV6 IPv6 support is enabled. 109 IPV6 IPv6 support is enabled.
110 ISAPNP ISA PnP code is enabled. 110 ISAPNP ISA PnP code is enabled.
111 ISDN Appropriate ISDN support is enabled. 111 ISDN Appropriate ISDN support is enabled.
112 ISOL CPU Isolation is enabled.
112 JOY Appropriate joystick support is enabled. 113 JOY Appropriate joystick support is enabled.
113 KGDB Kernel debugger support is enabled. 114 KGDB Kernel debugger support is enabled.
114 KVM Kernel Virtual Machine support is enabled. 115 KVM Kernel Virtual Machine support is enabled.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 6571fbfdb2a1..af7104aaffd9 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -328,11 +328,15 @@
328 not play well with APC CPU idle - disable it if you have 328 not play well with APC CPU idle - disable it if you have
329 APC and your system crashes randomly. 329 APC and your system crashes randomly.
330 330
331 apic= [APIC,X86-32] Advanced Programmable Interrupt Controller 331 apic= [APIC,X86] Advanced Programmable Interrupt Controller
332 Change the output verbosity whilst booting 332 Change the output verbosity whilst booting
333 Format: { quiet (default) | verbose | debug } 333 Format: { quiet (default) | verbose | debug }
334 Change the amount of debugging information output 334 Change the amount of debugging information output
335 when initialising the APIC and IO-APIC components. 335 when initialising the APIC and IO-APIC components.
336 For X86-32, this can also be used to specify an APIC
337 driver name.
338 Format: apic=driver_name
339 Examples: apic=bigsmp
336 340
337 apic_extnmi= [APIC,X86] External NMI delivery setting 341 apic_extnmi= [APIC,X86] External NMI delivery setting
338 Format: { bsp (default) | all | none } 342 Format: { bsp (default) | all | none }
@@ -1737,7 +1741,7 @@
1737 isapnp= [ISAPNP] 1741 isapnp= [ISAPNP]
1738 Format: <RDP>,<reset>,<pci_scan>,<verbosity> 1742 Format: <RDP>,<reset>,<pci_scan>,<verbosity>
1739 1743
1740 isolcpus= [KNL,SMP] Isolate a given set of CPUs from disturbance. 1744 isolcpus= [KNL,SMP,ISOL] Isolate a given set of CPUs from disturbance.
1741 [Deprecated - use cpusets instead] 1745 [Deprecated - use cpusets instead]
1742 Format: [flag-list,]<cpu-list> 1746 Format: [flag-list,]<cpu-list>
1743 1747
@@ -2662,7 +2666,7 @@
2662 Valid arguments: on, off 2666 Valid arguments: on, off
2663 Default: on 2667 Default: on
2664 2668
2665 nohz_full= [KNL,BOOT] 2669 nohz_full= [KNL,BOOT,SMP,ISOL]
2666 The argument is a cpu list, as described above. 2670 The argument is a cpu list, as described above.
2667 In kernels built with CONFIG_NO_HZ_FULL=y, set 2671 In kernels built with CONFIG_NO_HZ_FULL=y, set
2668 the specified list of CPUs whose tick will be stopped 2672 the specified list of CPUs whose tick will be stopped
@@ -2708,6 +2712,8 @@
2708 steal time is computed, but won't influence scheduler 2712 steal time is computed, but won't influence scheduler
2709 behaviour 2713 behaviour
2710 2714
2715 nopti [X86-64] Disable kernel page table isolation
2716
2711 nolapic [X86-32,APIC] Do not enable or use the local APIC. 2717 nolapic [X86-32,APIC] Do not enable or use the local APIC.
2712 2718
2713 nolapic_timer [X86-32,APIC] Do not use the local APIC timer. 2719 nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
@@ -3282,6 +3288,12 @@
3282 pt. [PARIDE] 3288 pt. [PARIDE]
3283 See Documentation/blockdev/paride.txt. 3289 See Documentation/blockdev/paride.txt.
3284 3290
3291 pti= [X86_64]
3292 Control user/kernel address space isolation:
3293 on - enable
3294 off - disable
3295 auto - default setting
3296
3285 pty.legacy_count= 3297 pty.legacy_count=
3286 [KNL] Number of legacy pty's. Overwrites compiled-in 3298 [KNL] Number of legacy pty's. Overwrites compiled-in
3287 default number. 3299 default number.
diff --git a/Documentation/admin-guide/thunderbolt.rst b/Documentation/admin-guide/thunderbolt.rst
index de50a8561774..9b55952039a6 100644
--- a/Documentation/admin-guide/thunderbolt.rst
+++ b/Documentation/admin-guide/thunderbolt.rst
@@ -230,7 +230,7 @@ If supported by your machine this will be exposed by the WMI bus with
230a sysfs attribute called "force_power". 230a sysfs attribute called "force_power".
231 231
232For example the intel-wmi-thunderbolt driver exposes this attribute in: 232For example the intel-wmi-thunderbolt driver exposes this attribute in:
233 /sys/devices/platform/PNP0C14:00/wmi_bus/wmi_bus-PNP0C14:00/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power 233 /sys/bus/wmi/devices/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power
234 234
235 To force the power to on, write 1 to this attribute file. 235 To force the power to on, write 1 to this attribute file.
236 To disable force power, write 0 to this attribute file. 236 To disable force power, write 0 to this attribute file.
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index 304bf22bb83c..fc1c884fea10 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -75,3 +75,4 @@ stable kernels.
75| Qualcomm Tech. | Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | 75| Qualcomm Tech. | Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 |
76| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 | 76| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 |
77| Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 | 77| Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 |
78| Qualcomm Tech. | Falkor v{1,2} | E1041 | QCOM_FALKOR_ERRATUM_1041 |
diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index 779211fbb69f..2cddab7efb20 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -898,6 +898,13 @@ controller implements weight and absolute bandwidth limit models for
898normal scheduling policy and absolute bandwidth allocation model for 898normal scheduling policy and absolute bandwidth allocation model for
899realtime scheduling policy. 899realtime scheduling policy.
900 900
901WARNING: cgroup2 doesn't yet support control of realtime processes and
902the cpu controller can only be enabled when all RT processes are in
903the root cgroup. Be aware that system management software may already
904have placed RT processes into nonroot cgroups during the system boot
905process, and these processes may need to be moved to the root cgroup
906before the cpu controller can be enabled.
907
901 908
902CPU Interface Files 909CPU Interface Files
903~~~~~~~~~~~~~~~~~~~ 910~~~~~~~~~~~~~~~~~~~
diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
index 376fa2f50e6b..956bb046e599 100644
--- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
+++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
@@ -13,7 +13,6 @@ Required properties:
13 at25df321a 13 at25df321a
14 at25df641 14 at25df641
15 at26df081a 15 at26df081a
16 en25s64
17 mr25h128 16 mr25h128
18 mr25h256 17 mr25h256
19 mr25h10 18 mr25h10
@@ -33,7 +32,6 @@ Required properties:
33 s25fl008k 32 s25fl008k
34 s25fl064k 33 s25fl064k
35 sst25vf040b 34 sst25vf040b
36 sst25wf040b
37 m25p40 35 m25p40
38 m25p80 36 m25p80
39 m25p16 37 m25p16
diff --git a/Documentation/devicetree/bindings/sound/da7218.txt b/Documentation/devicetree/bindings/sound/da7218.txt
index 5ca5a709b6aa..3ab9dfef38d1 100644
--- a/Documentation/devicetree/bindings/sound/da7218.txt
+++ b/Documentation/devicetree/bindings/sound/da7218.txt
@@ -73,7 +73,7 @@ Example:
73 compatible = "dlg,da7218"; 73 compatible = "dlg,da7218";
74 reg = <0x1a>; 74 reg = <0x1a>;
75 interrupt-parent = <&gpio6>; 75 interrupt-parent = <&gpio6>;
76 interrupts = <11 IRQ_TYPE_LEVEL_HIGH>; 76 interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
77 wakeup-source; 77 wakeup-source;
78 78
79 VDD-supply = <&reg_audio>; 79 VDD-supply = <&reg_audio>;
diff --git a/Documentation/devicetree/bindings/sound/da7219.txt b/Documentation/devicetree/bindings/sound/da7219.txt
index cf61681826b6..5b54d2d045c3 100644
--- a/Documentation/devicetree/bindings/sound/da7219.txt
+++ b/Documentation/devicetree/bindings/sound/da7219.txt
@@ -77,7 +77,7 @@ Example:
77 reg = <0x1a>; 77 reg = <0x1a>;
78 78
79 interrupt-parent = <&gpio6>; 79 interrupt-parent = <&gpio6>;
80 interrupts = <11 IRQ_TYPE_LEVEL_HIGH>; 80 interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
81 81
82 VDD-supply = <&reg_audio>; 82 VDD-supply = <&reg_audio>;
83 VDDMIC-supply = <&reg_audio>; 83 VDDMIC-supply = <&reg_audio>;
diff --git a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
index 5bf13960f7f4..e3c48b20b1a6 100644
--- a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
+++ b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
@@ -12,24 +12,30 @@ Required properties:
12 - "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc 12 - "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc
13- reg : Offset and length of the register set for the device 13- reg : Offset and length of the register set for the device
14- interrupts : Should contain CSPI/eCSPI interrupt 14- interrupts : Should contain CSPI/eCSPI interrupt
15- cs-gpios : Specifies the gpio pins to be used for chipselects.
16- clocks : Clock specifiers for both ipg and per clocks. 15- clocks : Clock specifiers for both ipg and per clocks.
17- clock-names : Clock names should include both "ipg" and "per" 16- clock-names : Clock names should include both "ipg" and "per"
18See the clock consumer binding, 17See the clock consumer binding,
19 Documentation/devicetree/bindings/clock/clock-bindings.txt 18 Documentation/devicetree/bindings/clock/clock-bindings.txt
20- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
21 Documentation/devicetree/bindings/dma/dma.txt
22- dma-names: DMA request names should include "tx" and "rx" if present.
23 19
24Obsolete properties: 20Recommended properties:
25- fsl,spi-num-chipselects : Contains the number of the chipselect 21- cs-gpios : GPIOs to use as chip selects, see spi-bus.txt. While the native chip
22select lines can be used, they appear to always generate a pulse between each
23word of a transfer. Most use cases will require GPIO based chip selects to
24generate a valid transaction.
26 25
27Optional properties: 26Optional properties:
27- num-cs : Number of total chip selects, see spi-bus.txt.
28- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
29Documentation/devicetree/bindings/dma/dma.txt.
30- dma-names: DMA request names, if present, should include "tx" and "rx".
28- fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register 31- fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register
29controlling the SPI_READY handling. Note that to enable the DRCTL consideration, 32controlling the SPI_READY handling. Note that to enable the DRCTL consideration,
30the SPI_READY mode-flag needs to be set too. 33the SPI_READY mode-flag needs to be set too.
31Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst). 34Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst).
32 35
36Obsolete properties:
37- fsl,spi-num-chipselects : Contains the number of the chipselect
38
33Example: 39Example:
34 40
35ecspi@70010000 { 41ecspi@70010000 {
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
index 8caa60734647..e6a5f4912b6d 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -156,6 +156,40 @@ handle it in two different ways:
156 root of the overlay. Finally the directory is moved to the new 156 root of the overlay. Finally the directory is moved to the new
157 location. 157 location.
158 158
159There are several ways to tune the "redirect_dir" feature.
160
161Kernel config options:
162
163- OVERLAY_FS_REDIRECT_DIR:
164 If this is enabled, then redirect_dir is turned on by default.
165- OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW:
166 If this is enabled, then redirects are always followed by default. Enabling
167 this results in a less secure configuration. Enable this option only when
168 worried about backward compatibility with kernels that have the redirect_dir
169 feature and follow redirects even if turned off.
170
171Module options (can also be changed through /sys/module/overlay/parameters/*):
172
173- "redirect_dir=BOOL":
174 See OVERLAY_FS_REDIRECT_DIR kernel config option above.
175- "redirect_always_follow=BOOL":
176 See OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW kernel config option above.
177- "redirect_max=NUM":
178 The maximum number of bytes in an absolute redirect (default is 256).
179
180Mount options:
181
182- "redirect_dir=on":
183 Redirects are enabled.
184- "redirect_dir=follow":
185 Redirects are not created, but followed.
186- "redirect_dir=off":
187 Redirects are not created and only followed if "redirect_always_follow"
188 feature is enabled in the kernel/module config.
189- "redirect_dir=nofollow":
190 Redirects are not created and not followed (equivalent to "redirect_dir=off"
191 if "redirect_always_follow" feature is not enabled).
192
159Non-directories 193Non-directories
160--------------- 194---------------
161 195
diff --git a/Documentation/locking/crossrelease.txt b/Documentation/locking/crossrelease.txt
deleted file mode 100644
index bdf1423d5f99..000000000000
--- a/Documentation/locking/crossrelease.txt
+++ /dev/null
@@ -1,874 +0,0 @@
1Crossrelease
2============
3
4Started by Byungchul Park <byungchul.park@lge.com>
5
6Contents:
7
8 (*) Background
9
10 - What causes deadlock
11 - How lockdep works
12
13 (*) Limitation
14
15 - Limit lockdep
16 - Pros from the limitation
17 - Cons from the limitation
18 - Relax the limitation
19
20 (*) Crossrelease
21
22 - Introduce crossrelease
23 - Introduce commit
24
25 (*) Implementation
26
27 - Data structures
28 - How crossrelease works
29
30 (*) Optimizations
31
32 - Avoid duplication
33 - Lockless for hot paths
34
35 (*) APPENDIX A: What lockdep does to work aggresively
36
37 (*) APPENDIX B: How to avoid adding false dependencies
38
39
40==========
41Background
42==========
43
44What causes deadlock
45--------------------
46
47A deadlock occurs when a context is waiting for an event to happen,
48which is impossible because another (or the) context who can trigger the
49event is also waiting for another (or the) event to happen, which is
50also impossible due to the same reason.
51
52For example:
53
54 A context going to trigger event C is waiting for event A to happen.
55 A context going to trigger event A is waiting for event B to happen.
56 A context going to trigger event B is waiting for event C to happen.
57
58A deadlock occurs when these three wait operations run at the same time,
59because event C cannot be triggered if event A does not happen, which in
60turn cannot be triggered if event B does not happen, which in turn
61cannot be triggered if event C does not happen. After all, no event can
62be triggered since any of them never meets its condition to wake up.
63
64A dependency might exist between two waiters and a deadlock might happen
65due to an incorrect releationship between dependencies. Thus, we must
66define what a dependency is first. A dependency exists between them if:
67
68 1. There are two waiters waiting for each event at a given time.
69 2. The only way to wake up each waiter is to trigger its event.
70 3. Whether one can be woken up depends on whether the other can.
71
72Each wait in the example creates its dependency like:
73
74 Event C depends on event A.
75 Event A depends on event B.
76 Event B depends on event C.
77
78 NOTE: Precisely speaking, a dependency is one between whether a
79 waiter for an event can be woken up and whether another waiter for
80 another event can be woken up. However from now on, we will describe
81 a dependency as if it's one between an event and another event for
82 simplicity.
83
84And they form circular dependencies like:
85
86 -> C -> A -> B -
87 / \
88 \ /
89 ----------------
90
91 where 'A -> B' means that event A depends on event B.
92
93Such circular dependencies lead to a deadlock since no waiter can meet
94its condition to wake up as described.
95
96CONCLUSION
97
98Circular dependencies cause a deadlock.
99
100
101How lockdep works
102-----------------
103
104Lockdep tries to detect a deadlock by checking dependencies created by
105lock operations, acquire and release. Waiting for a lock corresponds to
106waiting for an event, and releasing a lock corresponds to triggering an
107event in the previous section.
108
109In short, lockdep does:
110
111 1. Detect a new dependency.
112 2. Add the dependency into a global graph.
113 3. Check if that makes dependencies circular.
114 4. Report a deadlock or its possibility if so.
115
116For example, consider a graph built by lockdep that looks like:
117
118 A -> B -
119 \
120 -> E
121 /
122 C -> D -
123
124 where A, B,..., E are different lock classes.
125
126Lockdep will add a dependency into the graph on detection of a new
127dependency. For example, it will add a dependency 'E -> C' when a new
128dependency between lock E and lock C is detected. Then the graph will be:
129
130 A -> B -
131 \
132 -> E -
133 / \
134 -> C -> D - \
135 / /
136 \ /
137 ------------------
138
139 where A, B,..., E are different lock classes.
140
141This graph contains a subgraph which demonstrates circular dependencies:
142
143 -> E -
144 / \
145 -> C -> D - \
146 / /
147 \ /
148 ------------------
149
150 where C, D and E are different lock classes.
151
152This is the condition under which a deadlock might occur. Lockdep
153reports it on detection after adding a new dependency. This is the way
154how lockdep works.
155
156CONCLUSION
157
158Lockdep detects a deadlock or its possibility by checking if circular
159dependencies were created after adding each new dependency.
160
161
162==========
163Limitation
164==========
165
166Limit lockdep
167-------------
168
169Limiting lockdep to work on only typical locks e.g. spin locks and
170mutexes, which are released within the acquire context, the
171implementation becomes simple but its capacity for detection becomes
172limited. Let's check pros and cons in next section.
173
174
175Pros from the limitation
176------------------------
177
178Given the limitation, when acquiring a lock, locks in a held_locks
179cannot be released if the context cannot acquire it so has to wait to
180acquire it, which means all waiters for the locks in the held_locks are
181stuck. It's an exact case to create dependencies between each lock in
182the held_locks and the lock to acquire.
183
184For example:
185
186 CONTEXT X
187 ---------
188 acquire A
189 acquire B /* Add a dependency 'A -> B' */
190 release B
191 release A
192
193 where A and B are different lock classes.
194
195When acquiring lock A, the held_locks of CONTEXT X is empty thus no
196dependency is added. But when acquiring lock B, lockdep detects and adds
197a new dependency 'A -> B' between lock A in the held_locks and lock B.
198They can be simply added whenever acquiring each lock.
199
200And data required by lockdep exists in a local structure, held_locks
201embedded in task_struct. Forcing to access the data within the context,
202lockdep can avoid racy problems without explicit locks while handling
203the local data.
204
205Lastly, lockdep only needs to keep locks currently being held, to build
206a dependency graph. However, relaxing the limitation, it needs to keep
207even locks already released, because a decision whether they created
208dependencies might be long-deferred.
209
210To sum up, we can expect several advantages from the limitation:
211
212 1. Lockdep can easily identify a dependency when acquiring a lock.
213 2. Races are avoidable while accessing local locks in a held_locks.
214 3. Lockdep only needs to keep locks currently being held.
215
216CONCLUSION
217
218Given the limitation, the implementation becomes simple and efficient.
219
220
221Cons from the limitation
222------------------------
223
224Given the limitation, lockdep is applicable only to typical locks. For
225example, page locks for page access or completions for synchronization
226cannot work with lockdep.
227
228Can we detect deadlocks below, under the limitation?
229
230Example 1:
231
232 CONTEXT X CONTEXT Y CONTEXT Z
233 --------- --------- ----------
234 mutex_lock A
235 lock_page B
236 lock_page B
237 mutex_lock A /* DEADLOCK */
238 unlock_page B held by X
239 unlock_page B
240 mutex_unlock A
241 mutex_unlock A
242
243 where A and B are different lock classes.
244
245No, we cannot.
246
247Example 2:
248
249 CONTEXT X CONTEXT Y
250 --------- ---------
251 mutex_lock A
252 mutex_lock A
253 wait_for_complete B /* DEADLOCK */
254 complete B
255 mutex_unlock A
256 mutex_unlock A
257
258 where A is a lock class and B is a completion variable.
259
260No, we cannot.
261
262CONCLUSION
263
264Given the limitation, lockdep cannot detect a deadlock or its
265possibility caused by page locks or completions.
266
267
268Relax the limitation
269--------------------
270
271Under the limitation, things to create dependencies are limited to
272typical locks. However, synchronization primitives like page locks and
273completions, which are allowed to be released in any context, also
274create dependencies and can cause a deadlock. So lockdep should track
275these locks to do a better job. We have to relax the limitation for
276these locks to work with lockdep.
277
278Detecting dependencies is very important for lockdep to work because
279adding a dependency means adding an opportunity to check whether it
280causes a deadlock. The more lockdep adds dependencies, the more it
281thoroughly works. Thus Lockdep has to do its best to detect and add as
282many true dependencies into a graph as possible.
283
284For example, considering only typical locks, lockdep builds a graph like:
285
286 A -> B -
287 \
288 -> E
289 /
290 C -> D -
291
292 where A, B,..., E are different lock classes.
293
294On the other hand, under the relaxation, additional dependencies might
295be created and added. Assuming additional 'FX -> C' and 'E -> GX' are
296added thanks to the relaxation, the graph will be:
297
298 A -> B -
299 \
300 -> E -> GX
301 /
302 FX -> C -> D -
303
304 where A, B,..., E, FX and GX are different lock classes, and a suffix
305 'X' is added on non-typical locks.
306
307The latter graph gives us more chances to check circular dependencies
308than the former. However, it might suffer performance degradation since
309relaxing the limitation, with which design and implementation of lockdep
310can be efficient, might introduce inefficiency inevitably. So lockdep
311should provide two options, strong detection and efficient detection.
312
313Choosing efficient detection:
314
315 Lockdep works with only locks restricted to be released within the
316 acquire context. However, lockdep works efficiently.
317
318Choosing strong detection:
319
320 Lockdep works with all synchronization primitives. However, lockdep
321 suffers performance degradation.
322
323CONCLUSION
324
325Relaxing the limitation, lockdep can add additional dependencies giving
326additional opportunities to check circular dependencies.
327
328
329============
330Crossrelease
331============
332
333Introduce crossrelease
334----------------------
335
336In order to allow lockdep to handle additional dependencies by what
337might be released in any context, namely 'crosslock', we have to be able
338to identify those created by crosslocks. The proposed 'crossrelease'
339feature provoides a way to do that.
340
341Crossrelease feature has to do:
342
343 1. Identify dependencies created by crosslocks.
344 2. Add the dependencies into a dependency graph.
345
346That's all. Once a meaningful dependency is added into graph, then
347lockdep would work with the graph as it did. The most important thing
348crossrelease feature has to do is to correctly identify and add true
349dependencies into the global graph.
350
351A dependency e.g. 'A -> B' can be identified only in the A's release
352context because a decision required to identify the dependency can be
353made only in the release context. That is to decide whether A can be
354released so that a waiter for A can be woken up. It cannot be made in
355other than the A's release context.
356
357It's no matter for typical locks because each acquire context is same as
358its release context, thus lockdep can decide whether a lock can be
359released in the acquire context. However for crosslocks, lockdep cannot
360make the decision in the acquire context but has to wait until the
361release context is identified.
362
363Therefore, deadlocks by crosslocks cannot be detected just when it
364happens, because those cannot be identified until the crosslocks are
365released. However, deadlock possibilities can be detected and it's very
366worth. See 'APPENDIX A' section to check why.
367
368CONCLUSION
369
370Using crossrelease feature, lockdep can work with what might be released
371in any context, namely crosslock.
372
373
374Introduce commit
375----------------
376
377Since crossrelease defers the work adding true dependencies of
378crosslocks until they are actually released, crossrelease has to queue
379all acquisitions which might create dependencies with the crosslocks.
380Then it identifies dependencies using the queued data in batches at a
381proper time. We call it 'commit'.
382
383There are four types of dependencies:
384
3851. TT type: 'typical lock A -> typical lock B'
386
387 Just when acquiring B, lockdep can see it's in the A's release
388 context. So the dependency between A and B can be identified
389 immediately. Commit is unnecessary.
390
3912. TC type: 'typical lock A -> crosslock BX'
392
393 Just when acquiring BX, lockdep can see it's in the A's release
394 context. So the dependency between A and BX can be identified
395 immediately. Commit is unnecessary, too.
396
3973. CT type: 'crosslock AX -> typical lock B'
398
399 When acquiring B, lockdep cannot identify the dependency because
400 there's no way to know if it's in the AX's release context. It has
401 to wait until the decision can be made. Commit is necessary.
402
4034. CC type: 'crosslock AX -> crosslock BX'
404
405 When acquiring BX, lockdep cannot identify the dependency because
406 there's no way to know if it's in the AX's release context. It has
407 to wait until the decision can be made. Commit is necessary.
408 But, handling CC type is not implemented yet. It's a future work.
409
410Lockdep can work without commit for typical locks, but commit step is
411necessary once crosslocks are involved. Introducing commit, lockdep
412performs three steps. What lockdep does in each step is:
413
4141. Acquisition: For typical locks, lockdep does what it originally did
415 and queues the lock so that CT type dependencies can be checked using
416 it at the commit step. For crosslocks, it saves data which will be
417 used at the commit step and increases a reference count for it.
418
4192. Commit: No action is reauired for typical locks. For crosslocks,
420 lockdep adds CT type dependencies using the data saved at the
421 acquisition step.
422
4233. Release: No changes are required for typical locks. When a crosslock
424 is released, it decreases a reference count for it.
425
426CONCLUSION
427
428Crossrelease introduces commit step to handle dependencies of crosslocks
429in batches at a proper time.
430
431
432==============
433Implementation
434==============
435
436Data structures
437---------------
438
439Crossrelease introduces two main data structures.
440
4411. hist_lock
442
443 This is an array embedded in task_struct, for keeping lock history so
444 that dependencies can be added using them at the commit step. Since
445 it's local data, it can be accessed locklessly in the owner context.
446 The array is filled at the acquisition step and consumed at the
447 commit step. And it's managed in circular manner.
448
4492. cross_lock
450
451 One per lockdep_map exists. This is for keeping data of crosslocks
452 and used at the commit step.
453
454
455How crossrelease works
456----------------------
457
458It's the key of how crossrelease works, to defer necessary works to an
459appropriate point in time and perform in at once at the commit step.
460Let's take a look with examples step by step, starting from how lockdep
461works without crossrelease for typical locks.
462
463 acquire A /* Push A onto held_locks */
464 acquire B /* Push B onto held_locks and add 'A -> B' */
465 acquire C /* Push C onto held_locks and add 'B -> C' */
466 release C /* Pop C from held_locks */
467 release B /* Pop B from held_locks */
468 release A /* Pop A from held_locks */
469
470 where A, B and C are different lock classes.
471
472 NOTE: This document assumes that readers already understand how
473 lockdep works without crossrelease thus omits details. But there's
474 one thing to note. Lockdep pretends to pop a lock from held_locks
475 when releasing it. But it's subtly different from the original pop
476 operation because lockdep allows other than the top to be poped.
477
478In this case, lockdep adds 'the top of held_locks -> the lock to acquire'
479dependency every time acquiring a lock.
480
481After adding 'A -> B', a dependency graph will be:
482
483 A -> B
484
485 where A and B are different lock classes.
486
487And after adding 'B -> C', the graph will be:
488
489 A -> B -> C
490
491 where A, B and C are different lock classes.
492
493Let's performs commit step even for typical locks to add dependencies.
494Of course, commit step is not necessary for them, however, it would work
495well because this is a more general way.
496
497 acquire A
498 /*
499 * Queue A into hist_locks
500 *
501 * In hist_locks: A
502 * In graph: Empty
503 */
504
505 acquire B
506 /*
507 * Queue B into hist_locks
508 *
509 * In hist_locks: A, B
510 * In graph: Empty
511 */
512
513 acquire C
514 /*
515 * Queue C into hist_locks
516 *
517 * In hist_locks: A, B, C
518 * In graph: Empty
519 */
520
521 commit C
522 /*
523 * Add 'C -> ?'
524 * Answer the following to decide '?'
525 * What has been queued since acquire C: Nothing
526 *
527 * In hist_locks: A, B, C
528 * In graph: Empty
529 */
530
531 release C
532
533 commit B
534 /*
535 * Add 'B -> ?'
536 * Answer the following to decide '?'
537 * What has been queued since acquire B: C
538 *
539 * In hist_locks: A, B, C
540 * In graph: 'B -> C'
541 */
542
543 release B
544
545 commit A
546 /*
547 * Add 'A -> ?'
548 * Answer the following to decide '?'
549 * What has been queued since acquire A: B, C
550 *
551 * In hist_locks: A, B, C
552 * In graph: 'B -> C', 'A -> B', 'A -> C'
553 */
554
555 release A
556
557 where A, B and C are different lock classes.
558
559In this case, dependencies are added at the commit step as described.
560
561After commits for A, B and C, the graph will be:
562
563 A -> B -> C
564
565 where A, B and C are different lock classes.
566
567 NOTE: A dependency 'A -> C' is optimized out.
568
569We can see the former graph built without commit step is same as the
570latter graph built using commit steps. Of course the former way leads to
571earlier finish for building the graph, which means we can detect a
572deadlock or its possibility sooner. So the former way would be prefered
573when possible. But we cannot avoid using the latter way for crosslocks.
574
575Let's look at how commit steps work for crosslocks. In this case, the
576commit step is performed only on crosslock AX as real. And it assumes
577that the AX release context is different from the AX acquire context.
578
579 BX RELEASE CONTEXT BX ACQUIRE CONTEXT
580 ------------------ ------------------
581 acquire A
582 /*
583 * Push A onto held_locks
584 * Queue A into hist_locks
585 *
586 * In held_locks: A
587 * In hist_locks: A
588 * In graph: Empty
589 */
590
591 acquire BX
592 /*
593 * Add 'the top of held_locks -> BX'
594 *
595 * In held_locks: A
596 * In hist_locks: A
597 * In graph: 'A -> BX'
598 */
599
600 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
601 It must be guaranteed that the following operations are seen after
602 acquiring BX globally. It can be done by things like barrier.
603 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
604
605 acquire C
606 /*
607 * Push C onto held_locks
608 * Queue C into hist_locks
609 *
610 * In held_locks: C
611 * In hist_locks: C
612 * In graph: 'A -> BX'
613 */
614
615 release C
616 /*
617 * Pop C from held_locks
618 *
619 * In held_locks: Empty
620 * In hist_locks: C
621 * In graph: 'A -> BX'
622 */
623 acquire D
624 /*
625 * Push D onto held_locks
626 * Queue D into hist_locks
627 * Add 'the top of held_locks -> D'
628 *
629 * In held_locks: A, D
630 * In hist_locks: A, D
631 * In graph: 'A -> BX', 'A -> D'
632 */
633 acquire E
634 /*
635 * Push E onto held_locks
636 * Queue E into hist_locks
637 *
638 * In held_locks: E
639 * In hist_locks: C, E
640 * In graph: 'A -> BX', 'A -> D'
641 */
642
643 release E
644 /*
645 * Pop E from held_locks
646 *
647 * In held_locks: Empty
648 * In hist_locks: D, E
649 * In graph: 'A -> BX', 'A -> D'
650 */
651 release D
652 /*
653 * Pop D from held_locks
654 *
655 * In held_locks: A
656 * In hist_locks: A, D
657 * In graph: 'A -> BX', 'A -> D'
658 */
659 commit BX
660 /*
661 * Add 'BX -> ?'
662 * What has been queued since acquire BX: C, E
663 *
664 * In held_locks: Empty
665 * In hist_locks: D, E
666 * In graph: 'A -> BX', 'A -> D',
667 * 'BX -> C', 'BX -> E'
668 */
669
670 release BX
671 /*
672 * In held_locks: Empty
673 * In hist_locks: D, E
674 * In graph: 'A -> BX', 'A -> D',
675 * 'BX -> C', 'BX -> E'
676 */
677 release A
678 /*
679 * Pop A from held_locks
680 *
681 * In held_locks: Empty
682 * In hist_locks: A, D
683 * In graph: 'A -> BX', 'A -> D',
684 * 'BX -> C', 'BX -> E'
685 */
686
687 where A, BX, C,..., E are different lock classes, and a suffix 'X' is
688 added on crosslocks.
689
690Crossrelease considers all acquisitions after acqiuring BX are
691candidates which might create dependencies with BX. True dependencies
692will be determined when identifying the release context of BX. Meanwhile,
693all typical locks are queued so that they can be used at the commit step.
694And then two dependencies 'BX -> C' and 'BX -> E' are added at the
695commit step when identifying the release context.
696
697The final graph will be, with crossrelease:
698
699 -> C
700 /
701 -> BX -
702 / \
703 A - -> E
704 \
705 -> D
706
707 where A, BX, C,..., E are different lock classes, and a suffix 'X' is
708 added on crosslocks.
709
710However, the final graph will be, without crossrelease:
711
712 A -> D
713
714 where A and D are different lock classes.
715
716The former graph has three more dependencies, 'A -> BX', 'BX -> C' and
717'BX -> E' giving additional opportunities to check if they cause
718deadlocks. This way lockdep can detect a deadlock or its possibility
719caused by crosslocks.
720
721CONCLUSION
722
723We checked how crossrelease works with several examples.
724
725
726=============
727Optimizations
728=============
729
730Avoid duplication
731-----------------
732
733Crossrelease feature uses a cache like what lockdep already uses for
734dependency chains, but this time it's for caching CT type dependencies.
735Once that dependency is cached, the same will never be added again.
736
737
738Lockless for hot paths
739----------------------
740
741To keep all locks for later use at the commit step, crossrelease adopts
742a local array embedded in task_struct, which makes access to the data
743lockless by forcing it to happen only within the owner context. It's
744like how lockdep handles held_locks. Lockless implmentation is important
745since typical locks are very frequently acquired and released.
746
747
748=================================================
749APPENDIX A: What lockdep does to work aggresively
750=================================================
751
752A deadlock actually occurs when all wait operations creating circular
753dependencies run at the same time. Even though they don't, a potential
754deadlock exists if the problematic dependencies exist. Thus it's
755meaningful to detect not only an actual deadlock but also its potential
756possibility. The latter is rather valuable. When a deadlock occurs
757actually, we can identify what happens in the system by some means or
758other even without lockdep. However, there's no way to detect possiblity
759without lockdep unless the whole code is parsed in head. It's terrible.
760Lockdep does the both, and crossrelease only focuses on the latter.
761
762Whether or not a deadlock actually occurs depends on several factors.
763For example, what order contexts are switched in is a factor. Assuming
764circular dependencies exist, a deadlock would occur when contexts are
765switched so that all wait operations creating the dependencies run
766simultaneously. Thus to detect a deadlock possibility even in the case
767that it has not occured yet, lockdep should consider all possible
768combinations of dependencies, trying to:
769
7701. Use a global dependency graph.
771
772 Lockdep combines all dependencies into one global graph and uses them,
773 regardless of which context generates them or what order contexts are
774 switched in. Aggregated dependencies are only considered so they are
775 prone to be circular if a problem exists.
776
7772. Check dependencies between classes instead of instances.
778
779 What actually causes a deadlock are instances of lock. However,
780 lockdep checks dependencies between classes instead of instances.
781 This way lockdep can detect a deadlock which has not happened but
782 might happen in future by others but the same class.
783
7843. Assume all acquisitions lead to waiting.
785
786 Although locks might be acquired without waiting which is essential
787 to create dependencies, lockdep assumes all acquisitions lead to
788 waiting since it might be true some time or another.
789
790CONCLUSION
791
792Lockdep detects not only an actual deadlock but also its possibility,
793and the latter is more valuable.
794
795
796==================================================
797APPENDIX B: How to avoid adding false dependencies
798==================================================
799
800Remind what a dependency is. A dependency exists if:
801
802 1. There are two waiters waiting for each event at a given time.
803 2. The only way to wake up each waiter is to trigger its event.
804 3. Whether one can be woken up depends on whether the other can.
805
806For example:
807
808 acquire A
809 acquire B /* A dependency 'A -> B' exists */
810 release B
811 release A
812
813 where A and B are different lock classes.
814
815A depedency 'A -> B' exists since:
816
817 1. A waiter for A and a waiter for B might exist when acquiring B.
818 2. Only way to wake up each is to release what it waits for.
819 3. Whether the waiter for A can be woken up depends on whether the
820 other can. IOW, TASK X cannot release A if it fails to acquire B.
821
822For another example:
823
824 TASK X TASK Y
825 ------ ------
826 acquire AX
827 acquire B /* A dependency 'AX -> B' exists */
828 release B
829 release AX held by Y
830
831 where AX and B are different lock classes, and a suffix 'X' is added
832 on crosslocks.
833
834Even in this case involving crosslocks, the same rule can be applied. A
835depedency 'AX -> B' exists since:
836
837 1. A waiter for AX and a waiter for B might exist when acquiring B.
838 2. Only way to wake up each is to release what it waits for.
839 3. Whether the waiter for AX can be woken up depends on whether the
840 other can. IOW, TASK X cannot release AX if it fails to acquire B.
841
842Let's take a look at more complicated example:
843
844 TASK X TASK Y
845 ------ ------
846 acquire B
847 release B
848 fork Y
849 acquire AX
850 acquire C /* A dependency 'AX -> C' exists */
851 release C
852 release AX held by Y
853
854 where AX, B and C are different lock classes, and a suffix 'X' is
855 added on crosslocks.
856
857Does a dependency 'AX -> B' exist? Nope.
858
859Two waiters are essential to create a dependency. However, waiters for
860AX and B to create 'AX -> B' cannot exist at the same time in this
861example. Thus the dependency 'AX -> B' cannot be created.
862
863It would be ideal if the full set of true ones can be considered. But
864we can ensure nothing but what actually happened. Relying on what
865actually happens at runtime, we can anyway add only true ones, though
866they might be a subset of true ones. It's similar to how lockdep works
867for typical locks. There might be more true dependencies than what
868lockdep has detected in runtime. Lockdep has no choice but to rely on
869what actually happens. Crossrelease also relies on it.
870
871CONCLUSION
872
873Relying on what actually happens, lockdep can avoid adding false
874dependencies.
diff --git a/Documentation/vm/zswap.txt b/Documentation/vm/zswap.txt
index 89fff7d611cc..0b3a1148f9f0 100644
--- a/Documentation/vm/zswap.txt
+++ b/Documentation/vm/zswap.txt
@@ -98,5 +98,25 @@ request is made for a page in an old zpool, it is uncompressed using its
98original compressor. Once all pages are removed from an old zpool, the zpool 98original compressor. Once all pages are removed from an old zpool, the zpool
99and its compressor are freed. 99and its compressor are freed.
100 100
101Some of the pages in zswap are same-value filled pages (i.e. contents of the
102page have same value or repetitive pattern). These pages include zero-filled
103pages and they are handled differently. During store operation, a page is
104checked if it is a same-value filled page before compressing it. If true, the
105compressed length of the page is set to zero and the pattern or same-filled
106value is stored.
107
108Same-value filled pages identification feature is enabled by default and can be
109disabled at boot time by setting the "same_filled_pages_enabled" attribute to 0,
110e.g. zswap.same_filled_pages_enabled=0. It can also be enabled and disabled at
111runtime using the sysfs "same_filled_pages_enabled" attribute, e.g.
112
113echo 1 > /sys/module/zswap/parameters/same_filled_pages_enabled
114
115When zswap same-filled page identification is disabled at runtime, it will stop
116checking for the same-value filled pages during store operation. However, the
117existing pages which are marked as same-value filled pages remain stored
118unchanged in zswap until they are either loaded or invalidated.
119
101A debugfs interface is provided for various statistic about pool size, number 120A debugfs interface is provided for various statistic about pool size, number
102of pages stored, and various counters for the reasons pages are rejected. 121of pages stored, same-value filled pages and various counters for the reasons
122pages are rejected.
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 3448e675b462..ad41b3813f0a 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -1,6 +1,4 @@
1 1
2<previous description obsolete, deleted>
3
4Virtual memory map with 4 level page tables: 2Virtual memory map with 4 level page tables:
5 3
60000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm 40000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
@@ -14,13 +12,16 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
14... unused hole ... 12... unused hole ...
15ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB) 13ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
16... unused hole ... 14... unused hole ...
15fffffe0000000000 - fffffe7fffffffff (=39 bits) LDT remap for PTI
16fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
17ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks 17ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
18... unused hole ... 18... unused hole ...
19ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space 19ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
20... unused hole ... 20... unused hole ...
21ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0 21ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
22ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable) 22ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space (variable)
23ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls 23[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
24ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
24ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole 25ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
25 26
26Virtual memory map with 5 level page tables: 27Virtual memory map with 5 level page tables:
@@ -29,26 +30,29 @@ Virtual memory map with 5 level page tables:
29hole caused by [56:63] sign extension 30hole caused by [56:63] sign extension
30ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor 31ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor
31ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory 32ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory
32ff90000000000000 - ff91ffffffffffff (=49 bits) hole 33ff90000000000000 - ff9fffffffffffff (=52 bits) LDT remap for PTI
33ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space 34ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB)
34ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole 35ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
35ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) 36ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
36... unused hole ... 37... unused hole ...
37ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB) 38ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
38... unused hole ... 39... unused hole ...
40fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
39ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks 41ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
40... unused hole ... 42... unused hole ...
41ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space 43ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
42... unused hole ... 44... unused hole ...
43ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0 45ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
44ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space 46ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space
45ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls 47[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
48ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
46ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole 49ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
47 50
48Architecture defines a 64-bit virtual address. Implementations can support 51Architecture defines a 64-bit virtual address. Implementations can support
49less. Currently supported are 48- and 57-bit virtual addresses. Bits 63 52less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
50through to the most-significant implemented bit are set to either all ones 53through to the most-significant implemented bit are sign extended.
51or all zero. This causes hole between user space and kernel addresses. 54This causes hole between user space and kernel addresses if you interpret them
55as unsigned.
52 56
53The direct mapping covers all memory in the system up to the highest 57The direct mapping covers all memory in the system up to the highest
54memory address (this means in some cases it can also include PCI memory 58memory address (this means in some cases it can also include PCI memory
@@ -58,9 +62,6 @@ vmalloc space is lazily synchronized into the different PML4/PML5 pages of
58the processes using the page fault handler, with init_top_pgt as 62the processes using the page fault handler, with init_top_pgt as
59reference. 63reference.
60 64
61Current X86-64 implementations support up to 46 bits of address space (64 TB),
62which is our current limit. This expands into MBZ space in the page tables.
63
64We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual 65We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
65memory window (this size is arbitrary, it can be raised later if needed). 66memory window (this size is arbitrary, it can be raised later if needed).
66The mappings are not part of any other kernel PGD and are only available 67The mappings are not part of any other kernel PGD and are only available
@@ -72,5 +73,3 @@ following fixmap section.
72Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all 73Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
73physical memory, vmalloc/ioremap space and virtual memory map are randomized. 74physical memory, vmalloc/ioremap space and virtual memory map are randomized.
74Their order is preserved but their base will be offset early at boot time. 75Their order is preserved but their base will be offset early at boot time.
75
76-Andi Kleen, Jul 2004
diff --git a/MAINTAINERS b/MAINTAINERS
index 37bb3ad34886..e3ed91e868c0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2621,24 +2621,22 @@ F: fs/bfs/
2621F: include/uapi/linux/bfs_fs.h 2621F: include/uapi/linux/bfs_fs.h
2622 2622
2623BLACKFIN ARCHITECTURE 2623BLACKFIN ARCHITECTURE
2624M: Steven Miao <realmz6@gmail.com>
2625L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) 2624L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
2626T: git git://git.code.sf.net/p/adi-linux/code 2625T: git git://git.code.sf.net/p/adi-linux/code
2627W: http://blackfin.uclinux.org 2626W: http://blackfin.uclinux.org
2628S: Supported 2627S: Orphan
2629F: arch/blackfin/ 2628F: arch/blackfin/
2630 2629
2631BLACKFIN EMAC DRIVER 2630BLACKFIN EMAC DRIVER
2632L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) 2631L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
2633W: http://blackfin.uclinux.org 2632W: http://blackfin.uclinux.org
2634S: Supported 2633S: Orphan
2635F: drivers/net/ethernet/adi/ 2634F: drivers/net/ethernet/adi/
2636 2635
2637BLACKFIN MEDIA DRIVER 2636BLACKFIN MEDIA DRIVER
2638M: Scott Jiang <scott.jiang.linux@gmail.com>
2639L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) 2637L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
2640W: http://blackfin.uclinux.org/ 2638W: http://blackfin.uclinux.org/
2641S: Supported 2639S: Orphan
2642F: drivers/media/platform/blackfin/ 2640F: drivers/media/platform/blackfin/
2643F: drivers/media/i2c/adv7183* 2641F: drivers/media/i2c/adv7183*
2644F: drivers/media/i2c/vs6624* 2642F: drivers/media/i2c/vs6624*
@@ -2646,25 +2644,25 @@ F: drivers/media/i2c/vs6624*
2646BLACKFIN RTC DRIVER 2644BLACKFIN RTC DRIVER
2647L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) 2645L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
2648W: http://blackfin.uclinux.org 2646W: http://blackfin.uclinux.org
2649S: Supported 2647S: Orphan
2650F: drivers/rtc/rtc-bfin.c 2648F: drivers/rtc/rtc-bfin.c
2651 2649
2652BLACKFIN SDH DRIVER 2650BLACKFIN SDH DRIVER
2653L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) 2651L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
2654W: http://blackfin.uclinux.org 2652W: http://blackfin.uclinux.org
2655S: Supported 2653S: Orphan
2656F: drivers/mmc/host/bfin_sdh.c 2654F: drivers/mmc/host/bfin_sdh.c
2657 2655
2658BLACKFIN SERIAL DRIVER 2656BLACKFIN SERIAL DRIVER
2659L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) 2657L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
2660W: http://blackfin.uclinux.org 2658W: http://blackfin.uclinux.org
2661S: Supported 2659S: Orphan
2662F: drivers/tty/serial/bfin_uart.c 2660F: drivers/tty/serial/bfin_uart.c
2663 2661
2664BLACKFIN WATCHDOG DRIVER 2662BLACKFIN WATCHDOG DRIVER
2665L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) 2663L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
2666W: http://blackfin.uclinux.org 2664W: http://blackfin.uclinux.org
2667S: Supported 2665S: Orphan
2668F: drivers/watchdog/bfin_wdt.c 2666F: drivers/watchdog/bfin_wdt.c
2669 2667
2670BLINKM RGB LED DRIVER 2668BLINKM RGB LED DRIVER
@@ -5431,7 +5429,7 @@ F: drivers/media/tuners/fc2580*
5431 5429
5432FCOE SUBSYSTEM (libfc, libfcoe, fcoe) 5430FCOE SUBSYSTEM (libfc, libfcoe, fcoe)
5433M: Johannes Thumshirn <jth@kernel.org> 5431M: Johannes Thumshirn <jth@kernel.org>
5434L: fcoe-devel@open-fcoe.org 5432L: linux-scsi@vger.kernel.org
5435W: www.Open-FCoE.org 5433W: www.Open-FCoE.org
5436S: Supported 5434S: Supported
5437F: drivers/scsi/libfc/ 5435F: drivers/scsi/libfc/
@@ -13141,6 +13139,7 @@ F: drivers/dma/dw/
13141 13139
13142SYNOPSYS DESIGNWARE ENTERPRISE ETHERNET DRIVER 13140SYNOPSYS DESIGNWARE ENTERPRISE ETHERNET DRIVER
13143M: Jie Deng <jiedeng@synopsys.com> 13141M: Jie Deng <jiedeng@synopsys.com>
13142M: Jose Abreu <Jose.Abreu@synopsys.com>
13144L: netdev@vger.kernel.org 13143L: netdev@vger.kernel.org
13145S: Supported 13144S: Supported
13146F: drivers/net/ethernet/synopsys/ 13145F: drivers/net/ethernet/synopsys/
@@ -13516,6 +13515,7 @@ M: Mika Westerberg <mika.westerberg@linux.intel.com>
13516M: Yehezkel Bernat <yehezkel.bernat@intel.com> 13515M: Yehezkel Bernat <yehezkel.bernat@intel.com>
13517T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git 13516T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git
13518S: Maintained 13517S: Maintained
13518F: Documentation/admin-guide/thunderbolt.rst
13519F: drivers/thunderbolt/ 13519F: drivers/thunderbolt/
13520F: include/linux/thunderbolt.h 13520F: include/linux/thunderbolt.h
13521 13521
diff --git a/Makefile b/Makefile
index 3f4d157add54..eb1f5973813e 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
2VERSION = 4 2VERSION = 4
3PATCHLEVEL = 15 3PATCHLEVEL = 15
4SUBLEVEL = 0 4SUBLEVEL = 0
5EXTRAVERSION = -rc3 5EXTRAVERSION = -rc6
6NAME = Fearless Coyote 6NAME = Fearless Coyote
7 7
8# *DOCUMENTATION* 8# *DOCUMENTATION*
@@ -789,6 +789,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign)
789# disable invalid "can't wrap" optimizations for signed / pointers 789# disable invalid "can't wrap" optimizations for signed / pointers
790KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) 790KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
791 791
792# Make sure -fstack-check isn't enabled (like gentoo apparently did)
793KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,)
794
792# conserve stack if available 795# conserve stack if available
793KBUILD_CFLAGS += $(call cc-option,-fconserve-stack) 796KBUILD_CFLAGS += $(call cc-option,-fconserve-stack)
794 797
diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
index fbb3758ca2e3..4b8edc8982cf 100644
--- a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
+++ b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
@@ -121,7 +121,7 @@
121 switch0port10: port@10 { 121 switch0port10: port@10 {
122 reg = <10>; 122 reg = <10>;
123 label = "dsa"; 123 label = "dsa";
124 phy-mode = "xgmii"; 124 phy-mode = "xaui";
125 link = <&switch1port10>; 125 link = <&switch1port10>;
126 }; 126 };
127 }; 127 };
@@ -208,7 +208,7 @@
208 switch1port10: port@10 { 208 switch1port10: port@10 {
209 reg = <10>; 209 reg = <10>;
210 label = "dsa"; 210 label = "dsa";
211 phy-mode = "xgmii"; 211 phy-mode = "xaui";
212 link = <&switch0port10>; 212 link = <&switch0port10>;
213 }; 213 };
214 }; 214 };
diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
index 1712f132b80d..b83fdc06286a 100644
--- a/arch/arm/lib/csumpartialcopyuser.S
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -85,7 +85,11 @@
85 .pushsection .text.fixup,"ax" 85 .pushsection .text.fixup,"ax"
86 .align 4 86 .align 4
879001: mov r4, #-EFAULT 879001: mov r4, #-EFAULT
88#ifdef CONFIG_CPU_SW_DOMAIN_PAN
89 ldr r5, [sp, #9*4] @ *err_ptr
90#else
88 ldr r5, [sp, #8*4] @ *err_ptr 91 ldr r5, [sp, #8*4] @ *err_ptr
92#endif
89 str r4, [r5] 93 str r4, [r5]
90 ldmia sp, {r1, r2} @ retrieve dst, len 94 ldmia sp, {r1, r2} @ retrieve dst, len
91 add r2, r2, r1 95 add r2, r2, r1
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a93339f5178f..c9a7e9e1414f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -557,7 +557,6 @@ config QCOM_QDF2400_ERRATUM_0065
557 557
558 If unsure, say Y. 558 If unsure, say Y.
559 559
560
561config SOCIONEXT_SYNQUACER_PREITS 560config SOCIONEXT_SYNQUACER_PREITS
562 bool "Socionext Synquacer: Workaround for GICv3 pre-ITS" 561 bool "Socionext Synquacer: Workaround for GICv3 pre-ITS"
563 default y 562 default y
@@ -576,6 +575,17 @@ config HISILICON_ERRATUM_161600802
576 a 128kB offset to be applied to the target address in this commands. 575 a 128kB offset to be applied to the target address in this commands.
577 576
578 If unsure, say Y. 577 If unsure, say Y.
578
579config QCOM_FALKOR_ERRATUM_E1041
580 bool "Falkor E1041: Speculative instruction fetches might cause errant memory access"
581 default y
582 help
583 Falkor CPU may speculatively fetch instructions from an improper
584 memory location when MMU translation is changed from SCTLR_ELn[M]=1
585 to SCTLR_ELn[M]=0. Prefix an ISB instruction to fix the problem.
586
587 If unsure, say Y.
588
579endmenu 589endmenu
580 590
581 591
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index aef72d886677..8b168280976f 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -512,4 +512,14 @@ alternative_else_nop_endif
512#endif 512#endif
513 .endm 513 .endm
514 514
515/**
516 * Errata workaround prior to disable MMU. Insert an ISB immediately prior
517 * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0.
518 */
519 .macro pre_disable_mmu_workaround
520#ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041
521 isb
522#endif
523 .endm
524
515#endif /* __ASM_ASSEMBLER_H */ 525#endif /* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index ac67cfc2585a..060e3a4008ab 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -60,6 +60,9 @@ enum ftr_type {
60#define FTR_VISIBLE true /* Feature visible to the user space */ 60#define FTR_VISIBLE true /* Feature visible to the user space */
61#define FTR_HIDDEN false /* Feature is hidden from the user */ 61#define FTR_HIDDEN false /* Feature is hidden from the user */
62 62
63#define FTR_VISIBLE_IF_IS_ENABLED(config) \
64 (IS_ENABLED(config) ? FTR_VISIBLE : FTR_HIDDEN)
65
63struct arm64_ftr_bits { 66struct arm64_ftr_bits {
64 bool sign; /* Value is signed ? */ 67 bool sign; /* Value is signed ? */
65 bool visible; 68 bool visible;
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 235e77d98261..cbf08d7cbf30 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -91,6 +91,7 @@
91#define BRCM_CPU_PART_VULCAN 0x516 91#define BRCM_CPU_PART_VULCAN 0x516
92 92
93#define QCOM_CPU_PART_FALKOR_V1 0x800 93#define QCOM_CPU_PART_FALKOR_V1 0x800
94#define QCOM_CPU_PART_FALKOR 0xC00
94 95
95#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) 96#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
96#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) 97#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
@@ -99,6 +100,7 @@
99#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) 100#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
100#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) 101#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
101#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) 102#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
103#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
102 104
103#ifndef __ASSEMBLY__ 105#ifndef __ASSEMBLY__
104 106
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 149d05fb9421..bdcc7f1c9d06 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -42,6 +42,8 @@
42#include <asm/cmpxchg.h> 42#include <asm/cmpxchg.h>
43#include <asm/fixmap.h> 43#include <asm/fixmap.h>
44#include <linux/mmdebug.h> 44#include <linux/mmdebug.h>
45#include <linux/mm_types.h>
46#include <linux/sched.h>
45 47
46extern void __pte_error(const char *file, int line, unsigned long val); 48extern void __pte_error(const char *file, int line, unsigned long val);
47extern void __pmd_error(const char *file, int line, unsigned long val); 49extern void __pmd_error(const char *file, int line, unsigned long val);
@@ -149,12 +151,20 @@ static inline pte_t pte_mkwrite(pte_t pte)
149 151
150static inline pte_t pte_mkclean(pte_t pte) 152static inline pte_t pte_mkclean(pte_t pte)
151{ 153{
152 return clear_pte_bit(pte, __pgprot(PTE_DIRTY)); 154 pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY));
155 pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
156
157 return pte;
153} 158}
154 159
155static inline pte_t pte_mkdirty(pte_t pte) 160static inline pte_t pte_mkdirty(pte_t pte)
156{ 161{
157 return set_pte_bit(pte, __pgprot(PTE_DIRTY)); 162 pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
163
164 if (pte_write(pte))
165 pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
166
167 return pte;
158} 168}
159 169
160static inline pte_t pte_mkold(pte_t pte) 170static inline pte_t pte_mkold(pte_t pte)
@@ -207,9 +217,6 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
207 } 217 }
208} 218}
209 219
210struct mm_struct;
211struct vm_area_struct;
212
213extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); 220extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
214 221
215/* 222/*
@@ -238,7 +245,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
238 * hardware updates of the pte (ptep_set_access_flags safely changes 245 * hardware updates of the pte (ptep_set_access_flags safely changes
239 * valid ptes without going through an invalid entry). 246 * valid ptes without going through an invalid entry).
240 */ 247 */
241 if (pte_valid(*ptep) && pte_valid(pte)) { 248 if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) &&
249 (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {
242 VM_WARN_ONCE(!pte_young(pte), 250 VM_WARN_ONCE(!pte_young(pte),
243 "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", 251 "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
244 __func__, pte_val(*ptep), pte_val(pte)); 252 __func__, pte_val(*ptep), pte_val(pte));
@@ -641,28 +649,23 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
641#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 649#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
642 650
643/* 651/*
644 * ptep_set_wrprotect - mark read-only while preserving the hardware update of 652 * ptep_set_wrprotect - mark read-only while trasferring potential hardware
645 * the Access Flag. 653 * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
646 */ 654 */
647#define __HAVE_ARCH_PTEP_SET_WRPROTECT 655#define __HAVE_ARCH_PTEP_SET_WRPROTECT
648static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) 656static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
649{ 657{
650 pte_t old_pte, pte; 658 pte_t old_pte, pte;
651 659
652 /*
653 * ptep_set_wrprotect() is only called on CoW mappings which are
654 * private (!VM_SHARED) with the pte either read-only (!PTE_WRITE &&
655 * PTE_RDONLY) or writable and software-dirty (PTE_WRITE &&
656 * !PTE_RDONLY && PTE_DIRTY); see is_cow_mapping() and
657 * protection_map[]. There is no race with the hardware update of the
658 * dirty state: clearing of PTE_RDONLY when PTE_WRITE (a.k.a. PTE_DBM)
659 * is set.
660 */
661 VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(*ptep),
662 "%s: potential race with hardware DBM", __func__);
663 pte = READ_ONCE(*ptep); 660 pte = READ_ONCE(*ptep);
664 do { 661 do {
665 old_pte = pte; 662 old_pte = pte;
663 /*
664 * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
665 * clear), set the PTE_DIRTY bit.
666 */
667 if (pte_hw_dirty(pte))
668 pte = pte_mkdirty(pte);
666 pte = pte_wrprotect(pte); 669 pte = pte_wrprotect(pte);
667 pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), 670 pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
668 pte_val(old_pte), pte_val(pte)); 671 pte_val(old_pte), pte_val(pte));
diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
index 65f42d257414..2a752cb2a0f3 100644
--- a/arch/arm64/kernel/cpu-reset.S
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -37,6 +37,7 @@ ENTRY(__cpu_soft_restart)
37 mrs x12, sctlr_el1 37 mrs x12, sctlr_el1
38 ldr x13, =SCTLR_ELx_FLAGS 38 ldr x13, =SCTLR_ELx_FLAGS
39 bic x12, x12, x13 39 bic x12, x12, x13
40 pre_disable_mmu_workaround
40 msr sctlr_el1, x12 41 msr sctlr_el1, x12
41 isb 42 isb
42 43
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index c5ba0097887f..a73a5928f09b 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -145,7 +145,8 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
145}; 145};
146 146
147static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { 147static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
148 ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0), 148 ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
149 FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
149 ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0), 150 ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0),
150 S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), 151 S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
151 S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), 152 S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index 4e6ad355bd05..6b9736c3fb56 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -96,6 +96,7 @@ ENTRY(entry)
96 mrs x0, sctlr_el2 96 mrs x0, sctlr_el2
97 bic x0, x0, #1 << 0 // clear SCTLR.M 97 bic x0, x0, #1 << 0 // clear SCTLR.M
98 bic x0, x0, #1 << 2 // clear SCTLR.C 98 bic x0, x0, #1 << 2 // clear SCTLR.C
99 pre_disable_mmu_workaround
99 msr sctlr_el2, x0 100 msr sctlr_el2, x0
100 isb 101 isb
101 b 2f 102 b 2f
@@ -103,6 +104,7 @@ ENTRY(entry)
103 mrs x0, sctlr_el1 104 mrs x0, sctlr_el1
104 bic x0, x0, #1 << 0 // clear SCTLR.M 105 bic x0, x0, #1 << 0 // clear SCTLR.M
105 bic x0, x0, #1 << 2 // clear SCTLR.C 106 bic x0, x0, #1 << 2 // clear SCTLR.C
107 pre_disable_mmu_workaround
106 msr sctlr_el1, x0 108 msr sctlr_el1, x0
107 isb 109 isb
1082: 1102:
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 540a1e010eb5..fae81f7964b4 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1043,7 +1043,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
1043 1043
1044 local_bh_disable(); 1044 local_bh_disable();
1045 1045
1046 current->thread.fpsimd_state = *state; 1046 current->thread.fpsimd_state.user_fpsimd = state->user_fpsimd;
1047 if (system_supports_sve() && test_thread_flag(TIF_SVE)) 1047 if (system_supports_sve() && test_thread_flag(TIF_SVE))
1048 fpsimd_to_sve(current); 1048 fpsimd_to_sve(current);
1049 1049
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 67e86a0f57ac..e3cb9fbf96b6 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -750,6 +750,7 @@ __primary_switch:
750 * to take into account by discarding the current kernel mapping and 750 * to take into account by discarding the current kernel mapping and
751 * creating a new one. 751 * creating a new one.
752 */ 752 */
753 pre_disable_mmu_workaround
753 msr sctlr_el1, x20 // disable the MMU 754 msr sctlr_el1, x20 // disable the MMU
754 isb 755 isb
755 bl __create_page_tables // recreate kernel mapping 756 bl __create_page_tables // recreate kernel mapping
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 749f81779420..74bb56f656ef 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -28,6 +28,7 @@
28#include <linux/perf_event.h> 28#include <linux/perf_event.h>
29#include <linux/ptrace.h> 29#include <linux/ptrace.h>
30#include <linux/smp.h> 30#include <linux/smp.h>
31#include <linux/uaccess.h>
31 32
32#include <asm/compat.h> 33#include <asm/compat.h>
33#include <asm/current.h> 34#include <asm/current.h>
@@ -36,7 +37,6 @@
36#include <asm/traps.h> 37#include <asm/traps.h>
37#include <asm/cputype.h> 38#include <asm/cputype.h>
38#include <asm/system_misc.h> 39#include <asm/system_misc.h>
39#include <asm/uaccess.h>
40 40
41/* Breakpoint currently in use for each BRP. */ 41/* Breakpoint currently in use for each BRP. */
42static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]); 42static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]);
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
index ce704a4aeadd..f407e422a720 100644
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -45,6 +45,7 @@ ENTRY(arm64_relocate_new_kernel)
45 mrs x0, sctlr_el2 45 mrs x0, sctlr_el2
46 ldr x1, =SCTLR_ELx_FLAGS 46 ldr x1, =SCTLR_ELx_FLAGS
47 bic x0, x0, x1 47 bic x0, x0, x1
48 pre_disable_mmu_workaround
48 msr sctlr_el2, x0 49 msr sctlr_el2, x0
49 isb 50 isb
501: 511:
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 3f9615582377..870828c364c5 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -151,6 +151,7 @@ reset:
151 mrs x5, sctlr_el2 151 mrs x5, sctlr_el2
152 ldr x6, =SCTLR_ELx_FLAGS 152 ldr x6, =SCTLR_ELx_FLAGS
153 bic x5, x5, x6 // Clear SCTL_M and etc 153 bic x5, x5, x6 // Clear SCTL_M and etc
154 pre_disable_mmu_workaround
154 msr sctlr_el2, x5 155 msr sctlr_el2, x5
155 isb 156 isb
156 157
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index 321c9c05dd9e..f4363d40e2cd 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -74,6 +74,9 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
74{ 74{
75 u64 reg; 75 u64 reg;
76 76
77 /* Clear pmscr in case of early return */
78 *pmscr_el1 = 0;
79
77 /* SPE present on this CPU? */ 80 /* SPE present on this CPU? */
78 if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1), 81 if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
79 ID_AA64DFR0_PMSVER_SHIFT)) 82 ID_AA64DFR0_PMSVER_SHIFT))
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index ca74a2aace42..7b60d62ac593 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -389,7 +389,7 @@ void ptdump_check_wx(void)
389 .check_wx = true, 389 .check_wx = true,
390 }; 390 };
391 391
392 walk_pgd(&st, &init_mm, 0); 392 walk_pgd(&st, &init_mm, VA_START);
393 note_page(&st, 0, 0, 0); 393 note_page(&st, 0, 0, 0);
394 if (st.wx_pages || st.uxn_pages) 394 if (st.wx_pages || st.uxn_pages)
395 pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n", 395 pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n",
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 22168cd0dde7..9b7f89df49db 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -574,7 +574,6 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
574{ 574{
575 struct siginfo info; 575 struct siginfo info;
576 const struct fault_info *inf; 576 const struct fault_info *inf;
577 int ret = 0;
578 577
579 inf = esr_to_fault_info(esr); 578 inf = esr_to_fault_info(esr);
580 pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n", 579 pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n",
@@ -589,7 +588,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
589 if (interrupts_enabled(regs)) 588 if (interrupts_enabled(regs))
590 nmi_enter(); 589 nmi_enter();
591 590
592 ret = ghes_notify_sea(); 591 ghes_notify_sea();
593 592
594 if (interrupts_enabled(regs)) 593 if (interrupts_enabled(regs))
595 nmi_exit(); 594 nmi_exit();
@@ -604,7 +603,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
604 info.si_addr = (void __user *)addr; 603 info.si_addr = (void __user *)addr;
605 arm64_notify_die("", regs, &info, esr); 604 arm64_notify_die("", regs, &info, esr);
606 605
607 return ret; 606 return 0;
608} 607}
609 608
610static const struct fault_info fault_info[] = { 609static const struct fault_info fault_info[] = {
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 5960bef0170d..00e7b900ca41 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -476,6 +476,8 @@ void __init arm64_memblock_init(void)
476 476
477 reserve_elfcorehdr(); 477 reserve_elfcorehdr();
478 478
479 high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
480
479 dma_contiguous_reserve(arm64_dma_phys_limit); 481 dma_contiguous_reserve(arm64_dma_phys_limit);
480 482
481 memblock_allow_resize(); 483 memblock_allow_resize();
@@ -502,7 +504,6 @@ void __init bootmem_init(void)
502 sparse_init(); 504 sparse_init();
503 zone_sizes_init(min, max); 505 zone_sizes_init(min, max);
504 506
505 high_memory = __va((max << PAGE_SHIFT) - 1) + 1;
506 memblock_dump_all(); 507 memblock_dump_all();
507} 508}
508 509
diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c
index 9345b44b86f0..f57118e1f6b4 100644
--- a/arch/parisc/boot/compressed/misc.c
+++ b/arch/parisc/boot/compressed/misc.c
@@ -123,8 +123,8 @@ int puts(const char *s)
123 while ((nuline = strchr(s, '\n')) != NULL) { 123 while ((nuline = strchr(s, '\n')) != NULL) {
124 if (nuline != s) 124 if (nuline != s)
125 pdc_iodc_print(s, nuline - s); 125 pdc_iodc_print(s, nuline - s);
126 pdc_iodc_print("\r\n", 2); 126 pdc_iodc_print("\r\n", 2);
127 s = nuline + 1; 127 s = nuline + 1;
128 } 128 }
129 if (*s != '\0') 129 if (*s != '\0')
130 pdc_iodc_print(s, strlen(s)); 130 pdc_iodc_print(s, strlen(s));
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index c980a02a52bc..598c8d60fa5e 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -35,7 +35,12 @@ struct thread_info {
35 35
36/* thread information allocation */ 36/* thread information allocation */
37 37
38#ifdef CONFIG_IRQSTACKS
39#define THREAD_SIZE_ORDER 2 /* PA-RISC requires at least 16k stack */
40#else
38#define THREAD_SIZE_ORDER 3 /* PA-RISC requires at least 32k stack */ 41#define THREAD_SIZE_ORDER 3 /* PA-RISC requires at least 32k stack */
42#endif
43
39/* Be sure to hunt all references to this down when you change the size of 44/* Be sure to hunt all references to this down when you change the size of
40 * the kernel stack */ 45 * the kernel stack */
41#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) 46#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index a4fd296c958e..f3cecf5117cf 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -878,9 +878,6 @@ ENTRY_CFI(syscall_exit_rfi)
878 STREG %r19,PT_SR7(%r16) 878 STREG %r19,PT_SR7(%r16)
879 879
880intr_return: 880intr_return:
881 /* NOTE: Need to enable interrupts incase we schedule. */
882 ssm PSW_SM_I, %r0
883
884 /* check for reschedule */ 881 /* check for reschedule */
885 mfctl %cr30,%r1 882 mfctl %cr30,%r1
886 LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */ 883 LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */
@@ -907,6 +904,11 @@ intr_check_sig:
907 LDREG PT_IASQ1(%r16), %r20 904 LDREG PT_IASQ1(%r16), %r20
908 cmpib,COND(=),n 0,%r20,intr_restore /* backward */ 905 cmpib,COND(=),n 0,%r20,intr_restore /* backward */
909 906
907 /* NOTE: We need to enable interrupts if we have to deliver
908 * signals. We used to do this earlier but it caused kernel
909 * stack overflows. */
910 ssm PSW_SM_I, %r0
911
910 copy %r0, %r25 /* long in_syscall = 0 */ 912 copy %r0, %r25 /* long in_syscall = 0 */
911#ifdef CONFIG_64BIT 913#ifdef CONFIG_64BIT
912 ldo -16(%r30),%r29 /* Reference param save area */ 914 ldo -16(%r30),%r29 /* Reference param save area */
@@ -958,6 +960,10 @@ intr_do_resched:
958 cmpib,COND(=) 0, %r20, intr_do_preempt 960 cmpib,COND(=) 0, %r20, intr_do_preempt
959 nop 961 nop
960 962
963 /* NOTE: We need to enable interrupts if we schedule. We used
964 * to do this earlier but it caused kernel stack overflows. */
965 ssm PSW_SM_I, %r0
966
961#ifdef CONFIG_64BIT 967#ifdef CONFIG_64BIT
962 ldo -16(%r30),%r29 /* Reference param save area */ 968 ldo -16(%r30),%r29 /* Reference param save area */
963#endif 969#endif
diff --git a/arch/parisc/kernel/hpmc.S b/arch/parisc/kernel/hpmc.S
index e3a8e5e4d5de..8d072c44f300 100644
--- a/arch/parisc/kernel/hpmc.S
+++ b/arch/parisc/kernel/hpmc.S
@@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc)
305 305
306 306
307 __INITRODATA 307 __INITRODATA
308 .align 4
308 .export os_hpmc_size 309 .export os_hpmc_size
309os_hpmc_size: 310os_hpmc_size:
310 .word .os_hpmc_end-.os_hpmc 311 .word .os_hpmc_end-.os_hpmc
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 5a657986ebbf..143f90e2f9f3 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -15,7 +15,6 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/kallsyms.h> 16#include <linux/kallsyms.h>
17#include <linux/sort.h> 17#include <linux/sort.h>
18#include <linux/sched.h>
19 18
20#include <linux/uaccess.h> 19#include <linux/uaccess.h>
21#include <asm/assembly.h> 20#include <asm/assembly.h>
diff --git a/arch/parisc/lib/delay.c b/arch/parisc/lib/delay.c
index 7eab4bb8abe6..66e506520505 100644
--- a/arch/parisc/lib/delay.c
+++ b/arch/parisc/lib/delay.c
@@ -16,9 +16,7 @@
16#include <linux/preempt.h> 16#include <linux/preempt.h>
17#include <linux/init.h> 17#include <linux/init.h>
18 18
19#include <asm/processor.h>
20#include <asm/delay.h> 19#include <asm/delay.h>
21
22#include <asm/special_insns.h> /* for mfctl() */ 20#include <asm/special_insns.h> /* for mfctl() */
23#include <asm/processor.h> /* for boot_cpu_data */ 21#include <asm/processor.h> /* for boot_cpu_data */
24 22
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 6177d43f0ce8..e2a2b8400490 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -160,9 +160,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
160#endif 160#endif
161} 161}
162 162
163static inline void arch_dup_mmap(struct mm_struct *oldmm, 163static inline int arch_dup_mmap(struct mm_struct *oldmm,
164 struct mm_struct *mm) 164 struct mm_struct *mm)
165{ 165{
166 return 0;
166} 167}
167 168
168#ifndef CONFIG_PPC_BOOK3S_64 169#ifndef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 5acb5a176dbe..72be0c32e902 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1403,7 +1403,7 @@ void show_regs(struct pt_regs * regs)
1403 1403
1404 printk("NIP: "REG" LR: "REG" CTR: "REG"\n", 1404 printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
1405 regs->nip, regs->link, regs->ctr); 1405 regs->nip, regs->link, regs->ctr);
1406 printk("REGS: %p TRAP: %04lx %s (%s)\n", 1406 printk("REGS: %px TRAP: %04lx %s (%s)\n",
1407 regs, regs->trap, print_tainted(), init_utsname()->release); 1407 regs, regs->trap, print_tainted(), init_utsname()->release);
1408 printk("MSR: "REG" ", regs->msr); 1408 printk("MSR: "REG" ", regs->msr);
1409 print_msr_bits(regs->msr); 1409 print_msr_bits(regs->msr);
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index bf457843e032..0d750d274c4e 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
725 725
726 /* Return the per-cpu state for state saving/migration */ 726 /* Return the per-cpu state for state saving/migration */
727 return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT | 727 return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
728 (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT; 728 (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
729 (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
729} 730}
730 731
731int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) 732int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
@@ -1558,7 +1559,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
1558 1559
1559 /* 1560 /*
1560 * Restore P and Q. If the interrupt was pending, we 1561 * Restore P and Q. If the interrupt was pending, we
1561 * force both P and Q, which will trigger a resend. 1562 * force Q and !P, which will trigger a resend.
1562 * 1563 *
1563 * That means that a guest that had both an interrupt 1564 * That means that a guest that had both an interrupt
1564 * pending (queued) and Q set will restore with only 1565 * pending (queued) and Q set will restore with only
@@ -1566,7 +1567,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
1566 * is perfectly fine as coalescing interrupts that haven't 1567 * is perfectly fine as coalescing interrupts that haven't
1567 * been presented yet is always allowed. 1568 * been presented yet is always allowed.
1568 */ 1569 */
1569 if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING) 1570 if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
1570 state->old_p = true; 1571 state->old_p = true;
1571 if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING) 1572 if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
1572 state->old_q = true; 1573 state->old_q = true;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 46d74e81aff1..d183b4801bdb 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -763,7 +763,8 @@ emit_clear:
763 func = (u8 *) __bpf_call_base + imm; 763 func = (u8 *) __bpf_call_base + imm;
764 764
765 /* Save skb pointer if we need to re-cache skb data */ 765 /* Save skb pointer if we need to re-cache skb data */
766 if (bpf_helper_changes_pkt_data(func)) 766 if ((ctx->seen & SEEN_SKB) &&
767 bpf_helper_changes_pkt_data(func))
767 PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); 768 PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
768 769
769 bpf_jit_emit_func_call(image, ctx, (u64)func); 770 bpf_jit_emit_func_call(image, ctx, (u64)func);
@@ -772,7 +773,8 @@ emit_clear:
772 PPC_MR(b2p[BPF_REG_0], 3); 773 PPC_MR(b2p[BPF_REG_0], 3);
773 774
774 /* refresh skb cache */ 775 /* refresh skb cache */
775 if (bpf_helper_changes_pkt_data(func)) { 776 if ((ctx->seen & SEEN_SKB) &&
777 bpf_helper_changes_pkt_data(func)) {
776 /* reload skb pointer to r3 */ 778 /* reload skb pointer to r3 */
777 PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); 779 PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
778 bpf_jit_emit_skb_loads(image, ctx); 780 bpf_jit_emit_skb_loads(image, ctx);
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 153812966365..fce545774d50 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)
410 int ret; 410 int ret;
411 __u64 target; 411 __u64 target;
412 412
413 if (is_kernel_addr(addr)) 413 if (is_kernel_addr(addr)) {
414 return branch_target((unsigned int *)addr); 414 if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
415 return 0;
416
417 return branch_target(&instr);
418 }
415 419
416 /* Userspace: need copy instruction here then translate it */ 420 /* Userspace: need copy instruction here then translate it */
417 pagefault_disable(); 421 pagefault_disable();
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 0ead3cd73caa..be4e7f84f70a 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -310,6 +310,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
310 return 0; 310 return 0;
311 311
312 /* 312 /*
313 * Check whether nest_imc is registered. We could end up here if the
314 * cpuhotplug callback registration fails. i.e, callback invokes the
315 * offline path for all successfully registered nodes. At this stage,
316 * nest_imc pmu will not be registered and we should return here.
317 *
318 * We return with a zero since this is not an offline failure. And
319 * cpuhp_setup_state() returns the actual failure reason to the caller,
320 * which in turn will call the cleanup routine.
321 */
322 if (!nest_pmus)
323 return 0;
324
325 /*
313 * Now that this cpu is one of the designated, 326 * Now that this cpu is one of the designated,
314 * find a next cpu a) which is online and b) in same chip. 327 * find a next cpu a) which is online and b) in same chip.
315 */ 328 */
@@ -1171,6 +1184,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
1171 if (nest_pmus == 1) { 1184 if (nest_pmus == 1) {
1172 cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE); 1185 cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
1173 kfree(nest_imc_refc); 1186 kfree(nest_imc_refc);
1187 kfree(per_nest_pmu_arr);
1174 } 1188 }
1175 1189
1176 if (nest_pmus > 0) 1190 if (nest_pmus > 0)
@@ -1195,7 +1209,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
1195 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); 1209 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
1196 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); 1210 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
1197 kfree(pmu_ptr); 1211 kfree(pmu_ptr);
1198 kfree(per_nest_pmu_arr);
1199 return; 1212 return;
1200} 1213}
1201 1214
@@ -1309,6 +1322,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
1309 ret = nest_pmu_cpumask_init(); 1322 ret = nest_pmu_cpumask_init();
1310 if (ret) { 1323 if (ret) {
1311 mutex_unlock(&nest_init_lock); 1324 mutex_unlock(&nest_init_lock);
1325 kfree(nest_imc_refc);
1326 kfree(per_nest_pmu_arr);
1312 goto err_free; 1327 goto err_free;
1313 } 1328 }
1314 } 1329 }
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 44cbf4c12ea1..df95102e732c 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -354,6 +354,7 @@ static int fsl_of_msi_remove(struct platform_device *ofdev)
354} 354}
355 355
356static struct lock_class_key fsl_msi_irq_class; 356static struct lock_class_key fsl_msi_irq_class;
357static struct lock_class_key fsl_msi_irq_request_class;
357 358
358static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, 359static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
359 int offset, int irq_index) 360 int offset, int irq_index)
@@ -373,7 +374,8 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
373 dev_err(&dev->dev, "No memory for MSI cascade data\n"); 374 dev_err(&dev->dev, "No memory for MSI cascade data\n");
374 return -ENOMEM; 375 return -ENOMEM;
375 } 376 }
376 irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class); 377 irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class,
378 &fsl_msi_irq_request_class);
377 cascade_data->index = offset; 379 cascade_data->index = offset;
378 cascade_data->msi_data = msi; 380 cascade_data->msi_data = msi;
379 cascade_data->virq = virt_msir; 381 cascade_data->virq = virt_msir;
diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h
index 773c4e039cd7..c0319cbf1eec 100644
--- a/arch/riscv/include/asm/barrier.h
+++ b/arch/riscv/include/asm/barrier.h
@@ -38,6 +38,25 @@
38#define smp_rmb() RISCV_FENCE(r,r) 38#define smp_rmb() RISCV_FENCE(r,r)
39#define smp_wmb() RISCV_FENCE(w,w) 39#define smp_wmb() RISCV_FENCE(w,w)
40 40
41/*
42 * This is a very specific barrier: it's currently only used in two places in
43 * the kernel, both in the scheduler. See include/linux/spinlock.h for the two
44 * orderings it guarantees, but the "critical section is RCsc" guarantee
45 * mandates a barrier on RISC-V. The sequence looks like:
46 *
47 * lr.aq lock
48 * sc lock <= LOCKED
49 * smp_mb__after_spinlock()
50 * // critical section
51 * lr lock
52 * sc.rl lock <= UNLOCKED
53 *
54 * The AQ/RL pair provides a RCpc critical section, but there's not really any
55 * way we can take advantage of that here because the ordering is only enforced
56 * on that one lock. Thus, we're just doing a full fence.
57 */
58#define smp_mb__after_spinlock() RISCV_FENCE(rw,rw)
59
41#include <asm-generic/barrier.h> 60#include <asm-generic/barrier.h>
42 61
43#endif /* __ASSEMBLY__ */ 62#endif /* __ASSEMBLY__ */
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 8fbb6749910d..cb7b0c63014e 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -38,10 +38,6 @@
38#include <asm/tlbflush.h> 38#include <asm/tlbflush.h>
39#include <asm/thread_info.h> 39#include <asm/thread_info.h>
40 40
41#ifdef CONFIG_HVC_RISCV_SBI
42#include <asm/hvc_riscv_sbi.h>
43#endif
44
45#ifdef CONFIG_DUMMY_CONSOLE 41#ifdef CONFIG_DUMMY_CONSOLE
46struct screen_info screen_info = { 42struct screen_info screen_info = {
47 .orig_video_lines = 30, 43 .orig_video_lines = 30,
@@ -212,13 +208,6 @@ static void __init setup_bootmem(void)
212 208
213void __init setup_arch(char **cmdline_p) 209void __init setup_arch(char **cmdline_p)
214{ 210{
215#if defined(CONFIG_HVC_RISCV_SBI)
216 if (likely(early_console == NULL)) {
217 early_console = &riscv_sbi_early_console_dev;
218 register_console(early_console);
219 }
220#endif
221
222#ifdef CONFIG_CMDLINE_BOOL 211#ifdef CONFIG_CMDLINE_BOOL
223#ifdef CONFIG_CMDLINE_OVERRIDE 212#ifdef CONFIG_CMDLINE_OVERRIDE
224 strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); 213 strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index a2ae936a093e..79c78668258e 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -70,7 +70,7 @@ SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end,
70 bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0; 70 bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0;
71 71
72 /* Check the reserved flags. */ 72 /* Check the reserved flags. */
73 if (unlikely(flags & !SYS_RISCV_FLUSH_ICACHE_ALL)) 73 if (unlikely(flags & ~SYS_RISCV_FLUSH_ICACHE_ALL))
74 return -EINVAL; 74 return -EINVAL;
75 75
76 flush_icache_mm(mm, local); 76 flush_icache_mm(mm, local);
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 57d7bc92e0b8..0a6b0286c32e 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1264,12 +1264,6 @@ static inline pud_t pud_mkwrite(pud_t pud)
1264 return pud; 1264 return pud;
1265} 1265}
1266 1266
1267#define pud_write pud_write
1268static inline int pud_write(pud_t pud)
1269{
1270 return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0;
1271}
1272
1273static inline pud_t pud_mkclean(pud_t pud) 1267static inline pud_t pud_mkclean(pud_t pud)
1274{ 1268{
1275 if (pud_large(pud)) { 1269 if (pud_large(pud)) {
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index f04db3779b34..59eea9c65d3e 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -263,6 +263,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
263 return retval; 263 return retval;
264 } 264 }
265 265
266 groups_sort(group_info);
266 retval = set_current_groups(group_info); 267 retval = set_current_groups(group_info);
267 put_group_info(group_info); 268 put_group_info(group_info);
268 269
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index e81c16838b90..9557d8b516df 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -55,8 +55,7 @@ struct bpf_jit {
55#define SEEN_LITERAL 8 /* code uses literals */ 55#define SEEN_LITERAL 8 /* code uses literals */
56#define SEEN_FUNC 16 /* calls C functions */ 56#define SEEN_FUNC 16 /* calls C functions */
57#define SEEN_TAIL_CALL 32 /* code uses tail calls */ 57#define SEEN_TAIL_CALL 32 /* code uses tail calls */
58#define SEEN_SKB_CHANGE 64 /* code changes skb data */ 58#define SEEN_REG_AX 64 /* code uses constant blinding */
59#define SEEN_REG_AX 128 /* code uses constant blinding */
60#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB) 59#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
61 60
62/* 61/*
@@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
448 EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, 447 EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
449 REG_15, 152); 448 REG_15, 152);
450 } 449 }
451 if (jit->seen & SEEN_SKB) 450 if (jit->seen & SEEN_SKB) {
452 emit_load_skb_data_hlen(jit); 451 emit_load_skb_data_hlen(jit);
453 if (jit->seen & SEEN_SKB_CHANGE)
454 /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ 452 /* stg %b1,ST_OFF_SKBP(%r0,%r15) */
455 EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15, 453 EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
456 STK_OFF_SKBP); 454 STK_OFF_SKBP);
455 }
457} 456}
458 457
459/* 458/*
@@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
983 EMIT2(0x0d00, REG_14, REG_W1); 982 EMIT2(0x0d00, REG_14, REG_W1);
984 /* lgr %b0,%r2: load return value into %b0 */ 983 /* lgr %b0,%r2: load return value into %b0 */
985 EMIT4(0xb9040000, BPF_REG_0, REG_2); 984 EMIT4(0xb9040000, BPF_REG_0, REG_2);
986 if (bpf_helper_changes_pkt_data((void *)func)) { 985 if ((jit->seen & SEEN_SKB) &&
987 jit->seen |= SEEN_SKB_CHANGE; 986 bpf_helper_changes_pkt_data((void *)func)) {
988 /* lg %b1,ST_OFF_SKBP(%r15) */ 987 /* lg %b1,ST_OFF_SKBP(%r15) */
989 EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0, 988 EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
990 REG_15, STK_OFF_SKBP); 989 REG_15, STK_OFF_SKBP);
diff --git a/arch/sparc/lib/hweight.S b/arch/sparc/lib/hweight.S
index e5547b22cd18..0ddbbb031822 100644
--- a/arch/sparc/lib/hweight.S
+++ b/arch/sparc/lib/hweight.S
@@ -44,8 +44,8 @@ EXPORT_SYMBOL(__arch_hweight32)
44 .previous 44 .previous
45 45
46ENTRY(__arch_hweight64) 46ENTRY(__arch_hweight64)
47 sethi %hi(__sw_hweight16), %g1 47 sethi %hi(__sw_hweight64), %g1
48 jmpl %g1 + %lo(__sw_hweight16), %g0 48 jmpl %g1 + %lo(__sw_hweight64), %g0
49 nop 49 nop
50ENDPROC(__arch_hweight64) 50ENDPROC(__arch_hweight64)
51EXPORT_SYMBOL(__arch_hweight64) 51EXPORT_SYMBOL(__arch_hweight64)
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index be3136f142a9..a8103a84b4ac 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -113,7 +113,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
113 if (!printk_ratelimit()) 113 if (!printk_ratelimit())
114 return; 114 return;
115 115
116 printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x", 116 printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
117 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 117 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
118 tsk->comm, task_pid_nr(tsk), address, 118 tsk->comm, task_pid_nr(tsk), address,
119 (void *)regs->pc, (void *)regs->u_regs[UREG_I7], 119 (void *)regs->pc, (void *)regs->u_regs[UREG_I7],
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 815c03d7a765..41363f46797b 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -154,7 +154,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
154 if (!printk_ratelimit()) 154 if (!printk_ratelimit())
155 return; 155 return;
156 156
157 printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x", 157 printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
158 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 158 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
159 tsk->comm, task_pid_nr(tsk), address, 159 tsk->comm, task_pid_nr(tsk), address,
160 (void *)regs->tpc, (void *)regs->u_regs[UREG_I7], 160 (void *)regs->tpc, (void *)regs->u_regs[UREG_I7],
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
index 33c0f8bb0f33..5335ba3c850e 100644
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -75,7 +75,7 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
75 if (!(pmd_val(pmd) & _PAGE_VALID)) 75 if (!(pmd_val(pmd) & _PAGE_VALID))
76 return 0; 76 return 0;
77 77
78 if (!pmd_access_permitted(pmd, write)) 78 if (write && !pmd_write(pmd))
79 return 0; 79 return 0;
80 80
81 refs = 0; 81 refs = 0;
@@ -114,7 +114,7 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
114 if (!(pud_val(pud) & _PAGE_VALID)) 114 if (!(pud_val(pud) & _PAGE_VALID))
115 return 0; 115 return 0;
116 116
117 if (!pud_access_permitted(pud, write)) 117 if (write && !pud_write(pud))
118 return 0; 118 return 0;
119 119
120 refs = 0; 120 refs = 0;
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 5765e7e711f7..ff5f9cb3039a 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
1245 u8 *func = ((u8 *)__bpf_call_base) + imm; 1245 u8 *func = ((u8 *)__bpf_call_base) + imm;
1246 1246
1247 ctx->saw_call = true; 1247 ctx->saw_call = true;
1248 if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
1249 emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
1248 1250
1249 emit_call((u32 *)func, ctx); 1251 emit_call((u32 *)func, ctx);
1250 emit_nop(ctx); 1252 emit_nop(ctx);
1251 1253
1252 emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); 1254 emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
1253 1255
1254 if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind) 1256 if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
1255 load_skb_regs(ctx, bpf2sparc[BPF_REG_6]); 1257 load_skb_regs(ctx, L7);
1256 break; 1258 break;
1257 } 1259 }
1258 1260
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 50a32c33d729..73c57f614c9e 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -1,4 +1,5 @@
1generic-y += barrier.h 1generic-y += barrier.h
2generic-y += bpf_perf_event.h
2generic-y += bug.h 3generic-y += bug.h
3generic-y += clkdev.h 4generic-y += clkdev.h
4generic-y += current.h 5generic-y += current.h
diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
index b668e351fd6c..fca34b2177e2 100644
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm);
15/* 15/*
16 * Needed since we do not use the asm-generic/mm_hooks.h: 16 * Needed since we do not use the asm-generic/mm_hooks.h:
17 */ 17 */
18static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) 18static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
19{ 19{
20 uml_setup_stubs(mm); 20 uml_setup_stubs(mm);
21 return 0;
21} 22}
22extern void arch_exit_mmap(struct mm_struct *mm); 23extern void arch_exit_mmap(struct mm_struct *mm);
23static inline void arch_unmap(struct mm_struct *mm, 24static inline void arch_unmap(struct mm_struct *mm,
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 4e6fcb32620f..428644175956 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -150,7 +150,7 @@ static void show_segv_info(struct uml_pt_regs *regs)
150 if (!printk_ratelimit()) 150 if (!printk_ratelimit())
151 return; 151 return;
152 152
153 printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x", 153 printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x",
154 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 154 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
155 tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi), 155 tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
156 (void *)UPT_IP(regs), (void *)UPT_SP(regs), 156 (void *)UPT_IP(regs), (void *)UPT_SP(regs),
diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h
index 59b06b48f27d..5c205a9cb5a6 100644
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -81,9 +81,10 @@ do { \
81 } \ 81 } \
82} while (0) 82} while (0)
83 83
84static inline void arch_dup_mmap(struct mm_struct *oldmm, 84static inline int arch_dup_mmap(struct mm_struct *oldmm,
85 struct mm_struct *mm) 85 struct mm_struct *mm)
86{ 86{
87 return 0;
87} 88}
88 89
89static inline void arch_unmap(struct mm_struct *mm, 90static inline void arch_unmap(struct mm_struct *mm,
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8eed3f94bfc7..d4fc98c50378 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -926,7 +926,8 @@ config MAXSMP
926config NR_CPUS 926config NR_CPUS
927 int "Maximum number of CPUs" if SMP && !MAXSMP 927 int "Maximum number of CPUs" if SMP && !MAXSMP
928 range 2 8 if SMP && X86_32 && !X86_BIGSMP 928 range 2 8 if SMP && X86_32 && !X86_BIGSMP
929 range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK 929 range 2 64 if SMP && X86_32 && X86_BIGSMP
930 range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
930 range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64 931 range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
931 default "1" if !SMP 932 default "1" if !SMP
932 default "8192" if MAXSMP 933 default "8192" if MAXSMP
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 6293a8768a91..672441c008c7 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -400,6 +400,7 @@ config UNWINDER_FRAME_POINTER
400config UNWINDER_GUESS 400config UNWINDER_GUESS
401 bool "Guess unwinder" 401 bool "Guess unwinder"
402 depends on EXPERT 402 depends on EXPERT
403 depends on !STACKDEPOT
403 ---help--- 404 ---help---
404 This option enables the "guess" unwinder for unwinding kernel stack 405 This option enables the "guess" unwinder for unwinding kernel stack
405 traces. It scans the stack and reports every kernel text address it 406 traces. It scans the stack and reports every kernel text address it
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 1e9c322e973a..f25e1530e064 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -80,6 +80,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
80ifdef CONFIG_X86_64 80ifdef CONFIG_X86_64
81 vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o 81 vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o
82 vmlinux-objs-y += $(obj)/mem_encrypt.o 82 vmlinux-objs-y += $(obj)/mem_encrypt.o
83 vmlinux-objs-y += $(obj)/pgtable_64.o
83endif 84endif
84 85
85$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone 86$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 20919b4f3133..fc313e29fe2c 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -305,10 +305,18 @@ ENTRY(startup_64)
305 leaq boot_stack_end(%rbx), %rsp 305 leaq boot_stack_end(%rbx), %rsp
306 306
307#ifdef CONFIG_X86_5LEVEL 307#ifdef CONFIG_X86_5LEVEL
308 /* Check if 5-level paging has already enabled */ 308 /*
309 movq %cr4, %rax 309 * Check if we need to enable 5-level paging.
310 testl $X86_CR4_LA57, %eax 310 * RSI holds real mode data and need to be preserved across
311 jnz lvl5 311 * a function call.
312 */
313 pushq %rsi
314 call l5_paging_required
315 popq %rsi
316
317 /* If l5_paging_required() returned zero, we're done here. */
318 cmpq $0, %rax
319 je lvl5
312 320
313 /* 321 /*
314 * At this point we are in long mode with 4-level paging enabled, 322 * At this point we are in long mode with 4-level paging enabled,
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index b50c42455e25..98761a1576ce 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -169,6 +169,16 @@ void __puthex(unsigned long value)
169 } 169 }
170} 170}
171 171
172static bool l5_supported(void)
173{
174 /* Check if leaf 7 is supported. */
175 if (native_cpuid_eax(0) < 7)
176 return 0;
177
178 /* Check if la57 is supported. */
179 return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31));
180}
181
172#if CONFIG_X86_NEED_RELOCS 182#if CONFIG_X86_NEED_RELOCS
173static void handle_relocations(void *output, unsigned long output_len, 183static void handle_relocations(void *output, unsigned long output_len,
174 unsigned long virt_addr) 184 unsigned long virt_addr)
@@ -362,6 +372,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
362 console_init(); 372 console_init();
363 debug_putstr("early console in extract_kernel\n"); 373 debug_putstr("early console in extract_kernel\n");
364 374
375 if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) {
376 error("This linux kernel as configured requires 5-level paging\n"
377 "This CPU does not support the required 'cr4.la57' feature\n"
378 "Unable to boot - please use a kernel appropriate for your CPU\n");
379 }
380
365 free_mem_ptr = heap; /* Heap */ 381 free_mem_ptr = heap; /* Heap */
366 free_mem_end_ptr = heap + BOOT_HEAP_SIZE; 382 free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
367 383
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index d5364ca2e3f9..b5e5e02f8cde 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -23,6 +23,9 @@
23 */ 23 */
24#undef CONFIG_AMD_MEM_ENCRYPT 24#undef CONFIG_AMD_MEM_ENCRYPT
25 25
26/* No PAGE_TABLE_ISOLATION support needed either: */
27#undef CONFIG_PAGE_TABLE_ISOLATION
28
26#include "misc.h" 29#include "misc.h"
27 30
28/* These actually do the work of building the kernel identity maps. */ 31/* These actually do the work of building the kernel identity maps. */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
new file mode 100644
index 000000000000..b4469a37e9a1
--- /dev/null
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -0,0 +1,28 @@
1#include <asm/processor.h>
2
3/*
4 * __force_order is used by special_insns.h asm code to force instruction
5 * serialization.
6 *
7 * It is not referenced from the code, but GCC < 5 with -fPIE would fail
8 * due to an undefined symbol. Define it to make these ancient GCCs work.
9 */
10unsigned long __force_order;
11
12int l5_paging_required(void)
13{
14 /* Check if leaf 7 is supported. */
15
16 if (native_cpuid_eax(0) < 7)
17 return 0;
18
19 /* Check if la57 is supported. */
20 if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
21 return 0;
22
23 /* Check if 5-level paging has already been enabled. */
24 if (native_read_cr4() & X86_CR4_LA57)
25 return 0;
26
27 return 1;
28}
diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh
index 49f4970f693b..6a10d52a4145 100644
--- a/arch/x86/boot/genimage.sh
+++ b/arch/x86/boot/genimage.sh
@@ -44,9 +44,9 @@ FDINITRD=$6
44 44
45# Make sure the files actually exist 45# Make sure the files actually exist
46verify "$FBZIMAGE" 46verify "$FBZIMAGE"
47verify "$MTOOLSRC"
48 47
49genbzdisk() { 48genbzdisk() {
49 verify "$MTOOLSRC"
50 mformat a: 50 mformat a:
51 syslinux $FIMAGE 51 syslinux $FIMAGE
52 echo "$KCMDLINE" | mcopy - a:syslinux.cfg 52 echo "$KCMDLINE" | mcopy - a:syslinux.cfg
@@ -57,6 +57,7 @@ genbzdisk() {
57} 57}
58 58
59genfdimage144() { 59genfdimage144() {
60 verify "$MTOOLSRC"
60 dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null 61 dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null
61 mformat v: 62 mformat v:
62 syslinux $FIMAGE 63 syslinux $FIMAGE
@@ -68,6 +69,7 @@ genfdimage144() {
68} 69}
69 70
70genfdimage288() { 71genfdimage288() {
72 verify "$MTOOLSRC"
71 dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null 73 dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null
72 mformat w: 74 mformat w:
73 syslinux $FIMAGE 75 syslinux $FIMAGE
@@ -78,39 +80,43 @@ genfdimage288() {
78 mcopy $FBZIMAGE w:linux 80 mcopy $FBZIMAGE w:linux
79} 81}
80 82
81genisoimage() { 83geniso() {
82 tmp_dir=`dirname $FIMAGE`/isoimage 84 tmp_dir=`dirname $FIMAGE`/isoimage
83 rm -rf $tmp_dir 85 rm -rf $tmp_dir
84 mkdir $tmp_dir 86 mkdir $tmp_dir
85 for i in lib lib64 share end ; do 87 for i in lib lib64 share ; do
86 for j in syslinux ISOLINUX ; do 88 for j in syslinux ISOLINUX ; do
87 if [ -f /usr/$i/$j/isolinux.bin ] ; then 89 if [ -f /usr/$i/$j/isolinux.bin ] ; then
88 isolinux=/usr/$i/$j/isolinux.bin 90 isolinux=/usr/$i/$j/isolinux.bin
89 cp $isolinux $tmp_dir
90 fi 91 fi
91 done 92 done
92 for j in syslinux syslinux/modules/bios ; do 93 for j in syslinux syslinux/modules/bios ; do
93 if [ -f /usr/$i/$j/ldlinux.c32 ]; then 94 if [ -f /usr/$i/$j/ldlinux.c32 ]; then
94 ldlinux=/usr/$i/$j/ldlinux.c32 95 ldlinux=/usr/$i/$j/ldlinux.c32
95 cp $ldlinux $tmp_dir
96 fi 96 fi
97 done 97 done
98 if [ -n "$isolinux" -a -n "$ldlinux" ] ; then 98 if [ -n "$isolinux" -a -n "$ldlinux" ] ; then
99 break 99 break
100 fi 100 fi
101 if [ $i = end -a -z "$isolinux" ] ; then
102 echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
103 exit 1
104 fi
105 done 101 done
102 if [ -z "$isolinux" ] ; then
103 echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
104 exit 1
105 fi
106 if [ -z "$ldlinux" ] ; then
107 echo 'Need an ldlinux.c32 file, please install syslinux/isolinux.'
108 exit 1
109 fi
110 cp $isolinux $tmp_dir
111 cp $ldlinux $tmp_dir
106 cp $FBZIMAGE $tmp_dir/linux 112 cp $FBZIMAGE $tmp_dir/linux
107 echo "$KCMDLINE" > $tmp_dir/isolinux.cfg 113 echo "$KCMDLINE" > $tmp_dir/isolinux.cfg
108 if [ -f "$FDINITRD" ] ; then 114 if [ -f "$FDINITRD" ] ; then
109 cp "$FDINITRD" $tmp_dir/initrd.img 115 cp "$FDINITRD" $tmp_dir/initrd.img
110 fi 116 fi
111 mkisofs -J -r -input-charset=utf-8 -quiet -o $FIMAGE -b isolinux.bin \ 117 genisoimage -J -r -input-charset=utf-8 -quiet -o $FIMAGE \
112 -c boot.cat -no-emul-boot -boot-load-size 4 -boot-info-table \ 118 -b isolinux.bin -c boot.cat -no-emul-boot -boot-load-size 4 \
113 $tmp_dir 119 -boot-info-table $tmp_dir
114 isohybrid $FIMAGE 2>/dev/null || true 120 isohybrid $FIMAGE 2>/dev/null || true
115 rm -rf $tmp_dir 121 rm -rf $tmp_dir
116} 122}
@@ -119,6 +125,6 @@ case $1 in
119 bzdisk) genbzdisk;; 125 bzdisk) genbzdisk;;
120 fdimage144) genfdimage144;; 126 fdimage144) genfdimage144;;
121 fdimage288) genfdimage288;; 127 fdimage288) genfdimage288;;
122 isoimage) genisoimage;; 128 isoimage) geniso;;
123 *) echo 'Unknown image format'; exit 1; 129 *) echo 'Unknown image format'; exit 1;
124esac 130esac
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
index 399a29d067d6..cb91a64a99e7 100644
--- a/arch/x86/crypto/salsa20_glue.c
+++ b/arch/x86/crypto/salsa20_glue.c
@@ -59,13 +59,6 @@ static int encrypt(struct blkcipher_desc *desc,
59 59
60 salsa20_ivsetup(ctx, walk.iv); 60 salsa20_ivsetup(ctx, walk.iv);
61 61
62 if (likely(walk.nbytes == nbytes))
63 {
64 salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
65 walk.dst.virt.addr, nbytes);
66 return blkcipher_walk_done(desc, &walk, 0);
67 }
68
69 while (walk.nbytes >= 64) { 62 while (walk.nbytes >= 64) {
70 salsa20_encrypt_bytes(ctx, walk.src.virt.addr, 63 salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
71 walk.dst.virt.addr, 64 walk.dst.virt.addr,
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 3fd8bc560fae..45a63e00a6af 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -1,6 +1,11 @@
1/* SPDX-License-Identifier: GPL-2.0 */ 1/* SPDX-License-Identifier: GPL-2.0 */
2#include <linux/jump_label.h> 2#include <linux/jump_label.h>
3#include <asm/unwind_hints.h> 3#include <asm/unwind_hints.h>
4#include <asm/cpufeatures.h>
5#include <asm/page_types.h>
6#include <asm/percpu.h>
7#include <asm/asm-offsets.h>
8#include <asm/processor-flags.h>
4 9
5/* 10/*
6 11
@@ -187,6 +192,146 @@ For 32-bit we have the following conventions - kernel is built with
187#endif 192#endif
188.endm 193.endm
189 194
195#ifdef CONFIG_PAGE_TABLE_ISOLATION
196
197/*
198 * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two
199 * halves:
200 */
201#define PTI_SWITCH_PGTABLES_MASK (1<<PAGE_SHIFT)
202#define PTI_SWITCH_MASK (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
203
204.macro SET_NOFLUSH_BIT reg:req
205 bts $X86_CR3_PCID_NOFLUSH_BIT, \reg
206.endm
207
208.macro ADJUST_KERNEL_CR3 reg:req
209 ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
210 /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
211 andq $(~PTI_SWITCH_MASK), \reg
212.endm
213
214.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
215 ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
216 mov %cr3, \scratch_reg
217 ADJUST_KERNEL_CR3 \scratch_reg
218 mov \scratch_reg, %cr3
219.Lend_\@:
220.endm
221
222#define THIS_CPU_user_pcid_flush_mask \
223 PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
224
225.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
226 ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
227 mov %cr3, \scratch_reg
228
229 ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
230
231 /*
232 * Test if the ASID needs a flush.
233 */
234 movq \scratch_reg, \scratch_reg2
235 andq $(0x7FF), \scratch_reg /* mask ASID */
236 bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
237 jnc .Lnoflush_\@
238
239 /* Flush needed, clear the bit */
240 btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
241 movq \scratch_reg2, \scratch_reg
242 jmp .Lwrcr3_\@
243
244.Lnoflush_\@:
245 movq \scratch_reg2, \scratch_reg
246 SET_NOFLUSH_BIT \scratch_reg
247
248.Lwrcr3_\@:
249 /* Flip the PGD and ASID to the user version */
250 orq $(PTI_SWITCH_MASK), \scratch_reg
251 mov \scratch_reg, %cr3
252.Lend_\@:
253.endm
254
255.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
256 pushq %rax
257 SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
258 popq %rax
259.endm
260
261.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
262 ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
263 movq %cr3, \scratch_reg
264 movq \scratch_reg, \save_reg
265 /*
266 * Is the "switch mask" all zero? That means that both of
267 * these are zero:
268 *
269 * 1. The user/kernel PCID bit, and
270 * 2. The user/kernel "bit" that points CR3 to the
271 * bottom half of the 8k PGD
272 *
273 * That indicates a kernel CR3 value, not a user CR3.
274 */
275 testq $(PTI_SWITCH_MASK), \scratch_reg
276 jz .Ldone_\@
277
278 ADJUST_KERNEL_CR3 \scratch_reg
279 movq \scratch_reg, %cr3
280
281.Ldone_\@:
282.endm
283
284.macro RESTORE_CR3 scratch_reg:req save_reg:req
285 ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
286
287 ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
288
289 /*
290 * KERNEL pages can always resume with NOFLUSH as we do
291 * explicit flushes.
292 */
293 bt $X86_CR3_PTI_SWITCH_BIT, \save_reg
294 jnc .Lnoflush_\@
295
296 /*
297 * Check if there's a pending flush for the user ASID we're
298 * about to set.
299 */
300 movq \save_reg, \scratch_reg
301 andq $(0x7FF), \scratch_reg
302 bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
303 jnc .Lnoflush_\@
304
305 btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
306 jmp .Lwrcr3_\@
307
308.Lnoflush_\@:
309 SET_NOFLUSH_BIT \save_reg
310
311.Lwrcr3_\@:
312 /*
313 * The CR3 write could be avoided when not changing its value,
314 * but would require a CR3 read *and* a scratch register.
315 */
316 movq \save_reg, %cr3
317.Lend_\@:
318.endm
319
320#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
321
322.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
323.endm
324.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
325.endm
326.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
327.endm
328.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
329.endm
330.macro RESTORE_CR3 scratch_reg:req save_reg:req
331.endm
332
333#endif
334
190#endif /* CONFIG_X86_64 */ 335#endif /* CONFIG_X86_64 */
191 336
192/* 337/*
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 4838037f97f6..ace8f321a5a1 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -941,9 +941,10 @@ ENTRY(debug)
941 movl %esp, %eax # pt_regs pointer 941 movl %esp, %eax # pt_regs pointer
942 942
943 /* Are we currently on the SYSENTER stack? */ 943 /* Are we currently on the SYSENTER stack? */
944 PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) 944 movl PER_CPU_VAR(cpu_entry_area), %ecx
945 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ 945 addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
946 cmpl $SIZEOF_SYSENTER_stack, %ecx 946 subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
947 cmpl $SIZEOF_entry_stack, %ecx
947 jb .Ldebug_from_sysenter_stack 948 jb .Ldebug_from_sysenter_stack
948 949
949 TRACE_IRQS_OFF 950 TRACE_IRQS_OFF
@@ -984,9 +985,10 @@ ENTRY(nmi)
984 movl %esp, %eax # pt_regs pointer 985 movl %esp, %eax # pt_regs pointer
985 986
986 /* Are we currently on the SYSENTER stack? */ 987 /* Are we currently on the SYSENTER stack? */
987 PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) 988 movl PER_CPU_VAR(cpu_entry_area), %ecx
988 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ 989 addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
989 cmpl $SIZEOF_SYSENTER_stack, %ecx 990 subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
991 cmpl $SIZEOF_entry_stack, %ecx
990 jb .Lnmi_from_sysenter_stack 992 jb .Lnmi_from_sysenter_stack
991 993
992 /* Not on SYSENTER stack. */ 994 /* Not on SYSENTER stack. */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f81d50d7ceac..f048e384ff54 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -23,7 +23,6 @@
23#include <asm/segment.h> 23#include <asm/segment.h>
24#include <asm/cache.h> 24#include <asm/cache.h>
25#include <asm/errno.h> 25#include <asm/errno.h>
26#include "calling.h"
27#include <asm/asm-offsets.h> 26#include <asm/asm-offsets.h>
28#include <asm/msr.h> 27#include <asm/msr.h>
29#include <asm/unistd.h> 28#include <asm/unistd.h>
@@ -40,6 +39,8 @@
40#include <asm/frame.h> 39#include <asm/frame.h>
41#include <linux/err.h> 40#include <linux/err.h>
42 41
42#include "calling.h"
43
43.code64 44.code64
44.section .entry.text, "ax" 45.section .entry.text, "ax"
45 46
@@ -140,6 +141,67 @@ END(native_usergs_sysret64)
140 * with them due to bugs in both AMD and Intel CPUs. 141 * with them due to bugs in both AMD and Intel CPUs.
141 */ 142 */
142 143
144 .pushsection .entry_trampoline, "ax"
145
146/*
147 * The code in here gets remapped into cpu_entry_area's trampoline. This means
148 * that the assembler and linker have the wrong idea as to where this code
149 * lives (and, in fact, it's mapped more than once, so it's not even at a
150 * fixed address). So we can't reference any symbols outside the entry
151 * trampoline and expect it to work.
152 *
153 * Instead, we carefully abuse %rip-relative addressing.
154 * _entry_trampoline(%rip) refers to the start of the remapped) entry
155 * trampoline. We can thus find cpu_entry_area with this macro:
156 */
157
158#define CPU_ENTRY_AREA \
159 _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
160
161/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
162#define RSP_SCRATCH CPU_ENTRY_AREA_entry_stack + \
163 SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
164
165ENTRY(entry_SYSCALL_64_trampoline)
166 UNWIND_HINT_EMPTY
167 swapgs
168
169 /* Stash the user RSP. */
170 movq %rsp, RSP_SCRATCH
171
172 /* Note: using %rsp as a scratch reg. */
173 SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
174
175 /* Load the top of the task stack into RSP */
176 movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
177
178 /* Start building the simulated IRET frame. */
179 pushq $__USER_DS /* pt_regs->ss */
180 pushq RSP_SCRATCH /* pt_regs->sp */
181 pushq %r11 /* pt_regs->flags */
182 pushq $__USER_CS /* pt_regs->cs */
183 pushq %rcx /* pt_regs->ip */
184
185 /*
186 * x86 lacks a near absolute jump, and we can't jump to the real
187 * entry text with a relative jump. We could push the target
188 * address and then use retq, but this destroys the pipeline on
189 * many CPUs (wasting over 20 cycles on Sandy Bridge). Instead,
190 * spill RDI and restore it in a second-stage trampoline.
191 */
192 pushq %rdi
193 movq $entry_SYSCALL_64_stage2, %rdi
194 jmp *%rdi
195END(entry_SYSCALL_64_trampoline)
196
197 .popsection
198
199ENTRY(entry_SYSCALL_64_stage2)
200 UNWIND_HINT_EMPTY
201 popq %rdi
202 jmp entry_SYSCALL_64_after_hwframe
203END(entry_SYSCALL_64_stage2)
204
143ENTRY(entry_SYSCALL_64) 205ENTRY(entry_SYSCALL_64)
144 UNWIND_HINT_EMPTY 206 UNWIND_HINT_EMPTY
145 /* 207 /*
@@ -149,6 +211,10 @@ ENTRY(entry_SYSCALL_64)
149 */ 211 */
150 212
151 swapgs 213 swapgs
214 /*
215 * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
216 * is not required to switch CR3.
217 */
152 movq %rsp, PER_CPU_VAR(rsp_scratch) 218 movq %rsp, PER_CPU_VAR(rsp_scratch)
153 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 219 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
154 220
@@ -330,8 +396,25 @@ syscall_return_via_sysret:
330 popq %rsi /* skip rcx */ 396 popq %rsi /* skip rcx */
331 popq %rdx 397 popq %rdx
332 popq %rsi 398 popq %rsi
399
400 /*
401 * Now all regs are restored except RSP and RDI.
402 * Save old stack pointer and switch to trampoline stack.
403 */
404 movq %rsp, %rdi
405 movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
406
407 pushq RSP-RDI(%rdi) /* RSP */
408 pushq (%rdi) /* RDI */
409
410 /*
411 * We are on the trampoline stack. All regs except RDI are live.
412 * We can do future final exit work right here.
413 */
414 SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
415
333 popq %rdi 416 popq %rdi
334 movq RSP-ORIG_RAX(%rsp), %rsp 417 popq %rsp
335 USERGS_SYSRET64 418 USERGS_SYSRET64
336END(entry_SYSCALL_64) 419END(entry_SYSCALL_64)
337 420
@@ -466,12 +549,13 @@ END(irq_entries_start)
466 549
467.macro DEBUG_ENTRY_ASSERT_IRQS_OFF 550.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
468#ifdef CONFIG_DEBUG_ENTRY 551#ifdef CONFIG_DEBUG_ENTRY
469 pushfq 552 pushq %rax
470 testl $X86_EFLAGS_IF, (%rsp) 553 SAVE_FLAGS(CLBR_RAX)
554 testl $X86_EFLAGS_IF, %eax
471 jz .Lokay_\@ 555 jz .Lokay_\@
472 ud2 556 ud2
473.Lokay_\@: 557.Lokay_\@:
474 addq $8, %rsp 558 popq %rax
475#endif 559#endif
476.endm 560.endm
477 561
@@ -563,6 +647,13 @@ END(irq_entries_start)
563/* 0(%rsp): ~(interrupt number) */ 647/* 0(%rsp): ~(interrupt number) */
564 .macro interrupt func 648 .macro interrupt func
565 cld 649 cld
650
651 testb $3, CS-ORIG_RAX(%rsp)
652 jz 1f
653 SWAPGS
654 call switch_to_thread_stack
6551:
656
566 ALLOC_PT_GPREGS_ON_STACK 657 ALLOC_PT_GPREGS_ON_STACK
567 SAVE_C_REGS 658 SAVE_C_REGS
568 SAVE_EXTRA_REGS 659 SAVE_EXTRA_REGS
@@ -572,12 +663,8 @@ END(irq_entries_start)
572 jz 1f 663 jz 1f
573 664
574 /* 665 /*
575 * IRQ from user mode. Switch to kernel gsbase and inform context 666 * IRQ from user mode.
576 * tracking that we're in kernel mode. 667 *
577 */
578 SWAPGS
579
580 /*
581 * We need to tell lockdep that IRQs are off. We can't do this until 668 * We need to tell lockdep that IRQs are off. We can't do this until
582 * we fix gsbase, and we should do it before enter_from_user_mode 669 * we fix gsbase, and we should do it before enter_from_user_mode
583 * (which can take locks). Since TRACE_IRQS_OFF idempotent, 670 * (which can take locks). Since TRACE_IRQS_OFF idempotent,
@@ -630,10 +717,43 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
630 ud2 717 ud2
6311: 7181:
632#endif 719#endif
633 SWAPGS
634 POP_EXTRA_REGS 720 POP_EXTRA_REGS
635 POP_C_REGS 721 popq %r11
636 addq $8, %rsp /* skip regs->orig_ax */ 722 popq %r10
723 popq %r9
724 popq %r8
725 popq %rax
726 popq %rcx
727 popq %rdx
728 popq %rsi
729
730 /*
731 * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
732 * Save old stack pointer and switch to trampoline stack.
733 */
734 movq %rsp, %rdi
735 movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
736
737 /* Copy the IRET frame to the trampoline stack. */
738 pushq 6*8(%rdi) /* SS */
739 pushq 5*8(%rdi) /* RSP */
740 pushq 4*8(%rdi) /* EFLAGS */
741 pushq 3*8(%rdi) /* CS */
742 pushq 2*8(%rdi) /* RIP */
743
744 /* Push user RDI on the trampoline stack. */
745 pushq (%rdi)
746
747 /*
748 * We are on the trampoline stack. All regs except RDI are live.
749 * We can do future final exit work right here.
750 */
751
752 SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
753
754 /* Restore RDI. */
755 popq %rdi
756 SWAPGS
637 INTERRUPT_RETURN 757 INTERRUPT_RETURN
638 758
639 759
@@ -713,7 +833,9 @@ native_irq_return_ldt:
713 */ 833 */
714 834
715 pushq %rdi /* Stash user RDI */ 835 pushq %rdi /* Stash user RDI */
716 SWAPGS 836 SWAPGS /* to kernel GS */
837 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */
838
717 movq PER_CPU_VAR(espfix_waddr), %rdi 839 movq PER_CPU_VAR(espfix_waddr), %rdi
718 movq %rax, (0*8)(%rdi) /* user RAX */ 840 movq %rax, (0*8)(%rdi) /* user RAX */
719 movq (1*8)(%rsp), %rax /* user RIP */ 841 movq (1*8)(%rsp), %rax /* user RIP */
@@ -729,7 +851,6 @@ native_irq_return_ldt:
729 /* Now RAX == RSP. */ 851 /* Now RAX == RSP. */
730 852
731 andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */ 853 andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */
732 popq %rdi /* Restore user RDI */
733 854
734 /* 855 /*
735 * espfix_stack[31:16] == 0. The page tables are set up such that 856 * espfix_stack[31:16] == 0. The page tables are set up such that
@@ -740,7 +861,11 @@ native_irq_return_ldt:
740 * still points to an RO alias of the ESPFIX stack. 861 * still points to an RO alias of the ESPFIX stack.
741 */ 862 */
742 orq PER_CPU_VAR(espfix_stack), %rax 863 orq PER_CPU_VAR(espfix_stack), %rax
743 SWAPGS 864
865 SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
866 SWAPGS /* to user GS */
867 popq %rdi /* Restore user RDI */
868
744 movq %rax, %rsp 869 movq %rax, %rsp
745 UNWIND_HINT_IRET_REGS offset=8 870 UNWIND_HINT_IRET_REGS offset=8
746 871
@@ -829,7 +954,35 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
829/* 954/*
830 * Exception entry points. 955 * Exception entry points.
831 */ 956 */
832#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8) 957#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
958
959/*
960 * Switch to the thread stack. This is called with the IRET frame and
961 * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
962 * space has not been allocated for them.)
963 */
964ENTRY(switch_to_thread_stack)
965 UNWIND_HINT_FUNC
966
967 pushq %rdi
968 /* Need to switch before accessing the thread stack. */
969 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
970 movq %rsp, %rdi
971 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
972 UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
973
974 pushq 7*8(%rdi) /* regs->ss */
975 pushq 6*8(%rdi) /* regs->rsp */
976 pushq 5*8(%rdi) /* regs->eflags */
977 pushq 4*8(%rdi) /* regs->cs */
978 pushq 3*8(%rdi) /* regs->ip */
979 pushq 2*8(%rdi) /* regs->orig_ax */
980 pushq 8(%rdi) /* return address */
981 UNWIND_HINT_FUNC
982
983 movq (%rdi), %rdi
984 ret
985END(switch_to_thread_stack)
833 986
834.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 987.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
835ENTRY(\sym) 988ENTRY(\sym)
@@ -848,11 +1001,12 @@ ENTRY(\sym)
848 1001
849 ALLOC_PT_GPREGS_ON_STACK 1002 ALLOC_PT_GPREGS_ON_STACK
850 1003
851 .if \paranoid 1004 .if \paranoid < 2
852 .if \paranoid == 1
853 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */ 1005 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
854 jnz 1f 1006 jnz .Lfrom_usermode_switch_stack_\@
855 .endif 1007 .endif
1008
1009 .if \paranoid
856 call paranoid_entry 1010 call paranoid_entry
857 .else 1011 .else
858 call error_entry 1012 call error_entry
@@ -894,20 +1048,15 @@ ENTRY(\sym)
894 jmp error_exit 1048 jmp error_exit
895 .endif 1049 .endif
896 1050
897 .if \paranoid == 1 1051 .if \paranoid < 2
898 /* 1052 /*
899 * Paranoid entry from userspace. Switch stacks and treat it 1053 * Entry from userspace. Switch stacks and treat it
900 * as a normal entry. This means that paranoid handlers 1054 * as a normal entry. This means that paranoid handlers
901 * run in real process context if user_mode(regs). 1055 * run in real process context if user_mode(regs).
902 */ 1056 */
9031: 1057.Lfrom_usermode_switch_stack_\@:
904 call error_entry 1058 call error_entry
905 1059
906
907 movq %rsp, %rdi /* pt_regs pointer */
908 call sync_regs
909 movq %rax, %rsp /* switch stack */
910
911 movq %rsp, %rdi /* pt_regs pointer */ 1060 movq %rsp, %rdi /* pt_regs pointer */
912 1061
913 .if \has_error_code 1062 .if \has_error_code
@@ -1119,7 +1268,11 @@ ENTRY(paranoid_entry)
1119 js 1f /* negative -> in kernel */ 1268 js 1f /* negative -> in kernel */
1120 SWAPGS 1269 SWAPGS
1121 xorl %ebx, %ebx 1270 xorl %ebx, %ebx
11221: ret 1271
12721:
1273 SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
1274
1275 ret
1123END(paranoid_entry) 1276END(paranoid_entry)
1124 1277
1125/* 1278/*
@@ -1141,6 +1294,7 @@ ENTRY(paranoid_exit)
1141 testl %ebx, %ebx /* swapgs needed? */ 1294 testl %ebx, %ebx /* swapgs needed? */
1142 jnz .Lparanoid_exit_no_swapgs 1295 jnz .Lparanoid_exit_no_swapgs
1143 TRACE_IRQS_IRETQ 1296 TRACE_IRQS_IRETQ
1297 RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
1144 SWAPGS_UNSAFE_STACK 1298 SWAPGS_UNSAFE_STACK
1145 jmp .Lparanoid_exit_restore 1299 jmp .Lparanoid_exit_restore
1146.Lparanoid_exit_no_swapgs: 1300.Lparanoid_exit_no_swapgs:
@@ -1168,8 +1322,18 @@ ENTRY(error_entry)
1168 * from user mode due to an IRET fault. 1322 * from user mode due to an IRET fault.
1169 */ 1323 */
1170 SWAPGS 1324 SWAPGS
1325 /* We have user CR3. Change to kernel CR3. */
1326 SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
1171 1327
1172.Lerror_entry_from_usermode_after_swapgs: 1328.Lerror_entry_from_usermode_after_swapgs:
1329 /* Put us onto the real thread stack. */
1330 popq %r12 /* save return addr in %12 */
1331 movq %rsp, %rdi /* arg0 = pt_regs pointer */
1332 call sync_regs
1333 movq %rax, %rsp /* switch stack */
1334 ENCODE_FRAME_POINTER
1335 pushq %r12
1336
1173 /* 1337 /*
1174 * We need to tell lockdep that IRQs are off. We can't do this until 1338 * We need to tell lockdep that IRQs are off. We can't do this until
1175 * we fix gsbase, and we should do it before enter_from_user_mode 1339 * we fix gsbase, and we should do it before enter_from_user_mode
@@ -1206,6 +1370,7 @@ ENTRY(error_entry)
1206 * .Lgs_change's error handler with kernel gsbase. 1370 * .Lgs_change's error handler with kernel gsbase.
1207 */ 1371 */
1208 SWAPGS 1372 SWAPGS
1373 SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
1209 jmp .Lerror_entry_done 1374 jmp .Lerror_entry_done
1210 1375
1211.Lbstep_iret: 1376.Lbstep_iret:
@@ -1215,10 +1380,11 @@ ENTRY(error_entry)
1215 1380
1216.Lerror_bad_iret: 1381.Lerror_bad_iret:
1217 /* 1382 /*
1218 * We came from an IRET to user mode, so we have user gsbase. 1383 * We came from an IRET to user mode, so we have user
1219 * Switch to kernel gsbase: 1384 * gsbase and CR3. Switch to kernel gsbase and CR3:
1220 */ 1385 */
1221 SWAPGS 1386 SWAPGS
1387 SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
1222 1388
1223 /* 1389 /*
1224 * Pretend that the exception came from user mode: set up pt_regs 1390 * Pretend that the exception came from user mode: set up pt_regs
@@ -1250,6 +1416,10 @@ END(error_exit)
1250/* 1416/*
1251 * Runs on exception stack. Xen PV does not go through this path at all, 1417 * Runs on exception stack. Xen PV does not go through this path at all,
1252 * so we can use real assembly here. 1418 * so we can use real assembly here.
1419 *
1420 * Registers:
1421 * %r14: Used to save/restore the CR3 of the interrupted context
1422 * when PAGE_TABLE_ISOLATION is in use. Do not clobber.
1253 */ 1423 */
1254ENTRY(nmi) 1424ENTRY(nmi)
1255 UNWIND_HINT_IRET_REGS 1425 UNWIND_HINT_IRET_REGS
@@ -1313,6 +1483,7 @@ ENTRY(nmi)
1313 1483
1314 swapgs 1484 swapgs
1315 cld 1485 cld
1486 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
1316 movq %rsp, %rdx 1487 movq %rsp, %rdx
1317 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 1488 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
1318 UNWIND_HINT_IRET_REGS base=%rdx offset=8 1489 UNWIND_HINT_IRET_REGS base=%rdx offset=8
@@ -1565,6 +1736,8 @@ end_repeat_nmi:
1565 movq $-1, %rsi 1736 movq $-1, %rsi
1566 call do_nmi 1737 call do_nmi
1567 1738
1739 RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
1740
1568 testl %ebx, %ebx /* swapgs needed? */ 1741 testl %ebx, %ebx /* swapgs needed? */
1569 jnz nmi_restore 1742 jnz nmi_restore
1570nmi_swapgs: 1743nmi_swapgs:
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 568e130d932c..40f17009ec20 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -48,7 +48,11 @@
48 */ 48 */
49ENTRY(entry_SYSENTER_compat) 49ENTRY(entry_SYSENTER_compat)
50 /* Interrupts are off on entry. */ 50 /* Interrupts are off on entry. */
51 SWAPGS_UNSAFE_STACK 51 SWAPGS
52
53 /* We are about to clobber %rsp anyway, clobbering here is OK */
54 SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
55
52 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 56 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
53 57
54 /* 58 /*
@@ -216,6 +220,12 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
216 pushq $0 /* pt_regs->r15 = 0 */ 220 pushq $0 /* pt_regs->r15 = 0 */
217 221
218 /* 222 /*
223 * We just saved %rdi so it is safe to clobber. It is not
224 * preserved during the C calls inside TRACE_IRQS_OFF anyway.
225 */
226 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
227
228 /*
219 * User mode is traced as though IRQs are on, and SYSENTER 229 * User mode is traced as though IRQs are on, and SYSENTER
220 * turned them off. 230 * turned them off.
221 */ 231 */
@@ -256,10 +266,22 @@ sysret32_from_system_call:
256 * when the system call started, which is already known to user 266 * when the system call started, which is already known to user
257 * code. We zero R8-R10 to avoid info leaks. 267 * code. We zero R8-R10 to avoid info leaks.
258 */ 268 */
269 movq RSP-ORIG_RAX(%rsp), %rsp
270
271 /*
272 * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
273 * on the process stack which is not mapped to userspace and
274 * not readable after we SWITCH_TO_USER_CR3. Delay the CR3
275 * switch until after after the last reference to the process
276 * stack.
277 *
278 * %r8/%r9 are zeroed before the sysret, thus safe to clobber.
279 */
280 SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
281
259 xorq %r8, %r8 282 xorq %r8, %r8
260 xorq %r9, %r9 283 xorq %r9, %r9
261 xorq %r10, %r10 284 xorq %r10, %r10
262 movq RSP-ORIG_RAX(%rsp), %rsp
263 swapgs 285 swapgs
264 sysretl 286 sysretl
265END(entry_SYSCALL_compat) 287END(entry_SYSCALL_compat)
@@ -306,8 +328,11 @@ ENTRY(entry_INT80_compat)
306 */ 328 */
307 movl %eax, %eax 329 movl %eax, %eax
308 330
309 /* Construct struct pt_regs on stack (iret frame is already on stack) */
310 pushq %rax /* pt_regs->orig_ax */ 331 pushq %rax /* pt_regs->orig_ax */
332
333 /* switch to thread stack expects orig_ax to be pushed */
334 call switch_to_thread_stack
335
311 pushq %rdi /* pt_regs->di */ 336 pushq %rdi /* pt_regs->di */
312 pushq %rsi /* pt_regs->si */ 337 pushq %rsi /* pt_regs->si */
313 pushq %rdx /* pt_regs->dx */ 338 pushq %rdx /* pt_regs->dx */
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index f279ba2643dc..577fa8adb785 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -37,6 +37,7 @@
37#include <asm/unistd.h> 37#include <asm/unistd.h>
38#include <asm/fixmap.h> 38#include <asm/fixmap.h>
39#include <asm/traps.h> 39#include <asm/traps.h>
40#include <asm/paravirt.h>
40 41
41#define CREATE_TRACE_POINTS 42#define CREATE_TRACE_POINTS
42#include "vsyscall_trace.h" 43#include "vsyscall_trace.h"
@@ -138,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
138 139
139 WARN_ON_ONCE(address != regs->ip); 140 WARN_ON_ONCE(address != regs->ip);
140 141
142 /* This should be unreachable in NATIVE mode. */
143 if (WARN_ON(vsyscall_mode == NATIVE))
144 return false;
145
141 if (vsyscall_mode == NONE) { 146 if (vsyscall_mode == NONE) {
142 warn_bad_vsyscall(KERN_INFO, regs, 147 warn_bad_vsyscall(KERN_INFO, regs,
143 "vsyscall attempted with vsyscall=none"); 148 "vsyscall attempted with vsyscall=none");
@@ -329,16 +334,47 @@ int in_gate_area_no_mm(unsigned long addr)
329 return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR; 334 return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
330} 335}
331 336
337/*
338 * The VSYSCALL page is the only user-accessible page in the kernel address
339 * range. Normally, the kernel page tables can have _PAGE_USER clear, but
340 * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
341 * are enabled.
342 *
343 * Some day we may create a "minimal" vsyscall mode in which we emulate
344 * vsyscalls but leave the page not present. If so, we skip calling
345 * this.
346 */
347void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
348{
349 pgd_t *pgd;
350 p4d_t *p4d;
351 pud_t *pud;
352 pmd_t *pmd;
353
354 pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
355 set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
356 p4d = p4d_offset(pgd, VSYSCALL_ADDR);
357#if CONFIG_PGTABLE_LEVELS >= 5
358 p4d->p4d |= _PAGE_USER;
359#endif
360 pud = pud_offset(p4d, VSYSCALL_ADDR);
361 set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
362 pmd = pmd_offset(pud, VSYSCALL_ADDR);
363 set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
364}
365
332void __init map_vsyscall(void) 366void __init map_vsyscall(void)
333{ 367{
334 extern char __vsyscall_page; 368 extern char __vsyscall_page;
335 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); 369 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
336 370
337 if (vsyscall_mode != NONE) 371 if (vsyscall_mode != NONE) {
338 __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, 372 __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
339 vsyscall_mode == NATIVE 373 vsyscall_mode == NATIVE
340 ? PAGE_KERNEL_VSYSCALL 374 ? PAGE_KERNEL_VSYSCALL
341 : PAGE_KERNEL_VVAR); 375 : PAGE_KERNEL_VVAR);
376 set_vsyscall_pgtable_user_bits(swapper_pg_dir);
377 }
342 378
343 BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != 379 BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
344 (unsigned long)VSYSCALL_ADDR); 380 (unsigned long)VSYSCALL_ADDR);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 09c26a4f139c..731153a4681e 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3847,6 +3847,8 @@ static struct attribute *intel_pmu_attrs[] = {
3847 3847
3848__init int intel_pmu_init(void) 3848__init int intel_pmu_init(void)
3849{ 3849{
3850 struct attribute **extra_attr = NULL;
3851 struct attribute **to_free = NULL;
3850 union cpuid10_edx edx; 3852 union cpuid10_edx edx;
3851 union cpuid10_eax eax; 3853 union cpuid10_eax eax;
3852 union cpuid10_ebx ebx; 3854 union cpuid10_ebx ebx;
@@ -3854,7 +3856,6 @@ __init int intel_pmu_init(void)
3854 unsigned int unused; 3856 unsigned int unused;
3855 struct extra_reg *er; 3857 struct extra_reg *er;
3856 int version, i; 3858 int version, i;
3857 struct attribute **extra_attr = NULL;
3858 char *name; 3859 char *name;
3859 3860
3860 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 3861 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
@@ -4294,6 +4295,7 @@ __init int intel_pmu_init(void)
4294 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? 4295 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
4295 hsw_format_attr : nhm_format_attr; 4296 hsw_format_attr : nhm_format_attr;
4296 extra_attr = merge_attr(extra_attr, skl_format_attr); 4297 extra_attr = merge_attr(extra_attr, skl_format_attr);
4298 to_free = extra_attr;
4297 x86_pmu.cpu_events = get_hsw_events_attrs(); 4299 x86_pmu.cpu_events = get_hsw_events_attrs();
4298 intel_pmu_pebs_data_source_skl( 4300 intel_pmu_pebs_data_source_skl(
4299 boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X); 4301 boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
@@ -4401,6 +4403,7 @@ __init int intel_pmu_init(void)
4401 pr_cont("full-width counters, "); 4403 pr_cont("full-width counters, ");
4402 } 4404 }
4403 4405
4406 kfree(to_free);
4404 return 0; 4407 return 0;
4405} 4408}
4406 4409
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 3674a4b6f8bd..8f0aace08b87 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -3,16 +3,18 @@
3#include <linux/types.h> 3#include <linux/types.h>
4#include <linux/slab.h> 4#include <linux/slab.h>
5 5
6#include <asm/cpu_entry_area.h>
6#include <asm/perf_event.h> 7#include <asm/perf_event.h>
7#include <asm/insn.h> 8#include <asm/insn.h>
8 9
9#include "../perf_event.h" 10#include "../perf_event.h"
10 11
12/* Waste a full page so it can be mapped into the cpu_entry_area */
13DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
14
11/* The size of a BTS record in bytes: */ 15/* The size of a BTS record in bytes: */
12#define BTS_RECORD_SIZE 24 16#define BTS_RECORD_SIZE 24
13 17
14#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
15#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
16#define PEBS_FIXUP_SIZE PAGE_SIZE 18#define PEBS_FIXUP_SIZE PAGE_SIZE
17 19
18/* 20/*
@@ -279,17 +281,52 @@ void fini_debug_store_on_cpu(int cpu)
279 281
280static DEFINE_PER_CPU(void *, insn_buffer); 282static DEFINE_PER_CPU(void *, insn_buffer);
281 283
282static int alloc_pebs_buffer(int cpu) 284static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
283{ 285{
284 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 286 phys_addr_t pa;
287 size_t msz = 0;
288
289 pa = virt_to_phys(addr);
290 for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
291 cea_set_pte(cea, pa, prot);
292}
293
294static void ds_clear_cea(void *cea, size_t size)
295{
296 size_t msz = 0;
297
298 for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
299 cea_set_pte(cea, 0, PAGE_NONE);
300}
301
302static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
303{
304 unsigned int order = get_order(size);
285 int node = cpu_to_node(cpu); 305 int node = cpu_to_node(cpu);
286 int max; 306 struct page *page;
287 void *buffer, *ibuffer; 307
308 page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
309 return page ? page_address(page) : NULL;
310}
311
312static void dsfree_pages(const void *buffer, size_t size)
313{
314 if (buffer)
315 free_pages((unsigned long)buffer, get_order(size));
316}
317
318static int alloc_pebs_buffer(int cpu)
319{
320 struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
321 struct debug_store *ds = hwev->ds;
322 size_t bsiz = x86_pmu.pebs_buffer_size;
323 int max, node = cpu_to_node(cpu);
324 void *buffer, *ibuffer, *cea;
288 325
289 if (!x86_pmu.pebs) 326 if (!x86_pmu.pebs)
290 return 0; 327 return 0;
291 328
292 buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); 329 buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
293 if (unlikely(!buffer)) 330 if (unlikely(!buffer))
294 return -ENOMEM; 331 return -ENOMEM;
295 332
@@ -300,25 +337,27 @@ static int alloc_pebs_buffer(int cpu)
300 if (x86_pmu.intel_cap.pebs_format < 2) { 337 if (x86_pmu.intel_cap.pebs_format < 2) {
301 ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); 338 ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
302 if (!ibuffer) { 339 if (!ibuffer) {
303 kfree(buffer); 340 dsfree_pages(buffer, bsiz);
304 return -ENOMEM; 341 return -ENOMEM;
305 } 342 }
306 per_cpu(insn_buffer, cpu) = ibuffer; 343 per_cpu(insn_buffer, cpu) = ibuffer;
307 } 344 }
308 345 hwev->ds_pebs_vaddr = buffer;
309 max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size; 346 /* Update the cpu entry area mapping */
310 347 cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
311 ds->pebs_buffer_base = (u64)(unsigned long)buffer; 348 ds->pebs_buffer_base = (unsigned long) cea;
349 ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
312 ds->pebs_index = ds->pebs_buffer_base; 350 ds->pebs_index = ds->pebs_buffer_base;
313 ds->pebs_absolute_maximum = ds->pebs_buffer_base + 351 max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
314 max * x86_pmu.pebs_record_size; 352 ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
315
316 return 0; 353 return 0;
317} 354}
318 355
319static void release_pebs_buffer(int cpu) 356static void release_pebs_buffer(int cpu)
320{ 357{
321 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 358 struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
359 struct debug_store *ds = hwev->ds;
360 void *cea;
322 361
323 if (!ds || !x86_pmu.pebs) 362 if (!ds || !x86_pmu.pebs)
324 return; 363 return;
@@ -326,73 +365,70 @@ static void release_pebs_buffer(int cpu)
326 kfree(per_cpu(insn_buffer, cpu)); 365 kfree(per_cpu(insn_buffer, cpu));
327 per_cpu(insn_buffer, cpu) = NULL; 366 per_cpu(insn_buffer, cpu) = NULL;
328 367
329 kfree((void *)(unsigned long)ds->pebs_buffer_base); 368 /* Clear the fixmap */
369 cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
370 ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
330 ds->pebs_buffer_base = 0; 371 ds->pebs_buffer_base = 0;
372 dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
373 hwev->ds_pebs_vaddr = NULL;
331} 374}
332 375
333static int alloc_bts_buffer(int cpu) 376static int alloc_bts_buffer(int cpu)
334{ 377{
335 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 378 struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
336 int node = cpu_to_node(cpu); 379 struct debug_store *ds = hwev->ds;
337 int max, thresh; 380 void *buffer, *cea;
338 void *buffer; 381 int max;
339 382
340 if (!x86_pmu.bts) 383 if (!x86_pmu.bts)
341 return 0; 384 return 0;
342 385
343 buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); 386 buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
344 if (unlikely(!buffer)) { 387 if (unlikely(!buffer)) {
345 WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); 388 WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
346 return -ENOMEM; 389 return -ENOMEM;
347 } 390 }
348 391 hwev->ds_bts_vaddr = buffer;
349 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; 392 /* Update the fixmap */
350 thresh = max / 16; 393 cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
351 394 ds->bts_buffer_base = (unsigned long) cea;
352 ds->bts_buffer_base = (u64)(unsigned long)buffer; 395 ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
353 ds->bts_index = ds->bts_buffer_base; 396 ds->bts_index = ds->bts_buffer_base;
354 ds->bts_absolute_maximum = ds->bts_buffer_base + 397 max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
355 max * BTS_RECORD_SIZE; 398 ds->bts_absolute_maximum = ds->bts_buffer_base + max;
356 ds->bts_interrupt_threshold = ds->bts_absolute_maximum - 399 ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
357 thresh * BTS_RECORD_SIZE;
358
359 return 0; 400 return 0;
360} 401}
361 402
362static void release_bts_buffer(int cpu) 403static void release_bts_buffer(int cpu)
363{ 404{
364 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 405 struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
406 struct debug_store *ds = hwev->ds;
407 void *cea;
365 408
366 if (!ds || !x86_pmu.bts) 409 if (!ds || !x86_pmu.bts)
367 return; 410 return;
368 411
369 kfree((void *)(unsigned long)ds->bts_buffer_base); 412 /* Clear the fixmap */
413 cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
414 ds_clear_cea(cea, BTS_BUFFER_SIZE);
370 ds->bts_buffer_base = 0; 415 ds->bts_buffer_base = 0;
416 dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
417 hwev->ds_bts_vaddr = NULL;
371} 418}
372 419
373static int alloc_ds_buffer(int cpu) 420static int alloc_ds_buffer(int cpu)
374{ 421{
375 int node = cpu_to_node(cpu); 422 struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
376 struct debug_store *ds;
377
378 ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
379 if (unlikely(!ds))
380 return -ENOMEM;
381 423
424 memset(ds, 0, sizeof(*ds));
382 per_cpu(cpu_hw_events, cpu).ds = ds; 425 per_cpu(cpu_hw_events, cpu).ds = ds;
383
384 return 0; 426 return 0;
385} 427}
386 428
387static void release_ds_buffer(int cpu) 429static void release_ds_buffer(int cpu)
388{ 430{
389 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
390
391 if (!ds)
392 return;
393
394 per_cpu(cpu_hw_events, cpu).ds = NULL; 431 per_cpu(cpu_hw_events, cpu).ds = NULL;
395 kfree(ds);
396} 432}
397 433
398void release_ds_buffers(void) 434void release_ds_buffers(void)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index f7aaadf9331f..8e4ea143ed96 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -14,6 +14,8 @@
14 14
15#include <linux/perf_event.h> 15#include <linux/perf_event.h>
16 16
17#include <asm/intel_ds.h>
18
17/* To enable MSR tracing please use the generic trace points. */ 19/* To enable MSR tracing please use the generic trace points. */
18 20
19/* 21/*
@@ -77,8 +79,6 @@ struct amd_nb {
77 struct event_constraint event_constraints[X86_PMC_IDX_MAX]; 79 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
78}; 80};
79 81
80/* The maximal number of PEBS events: */
81#define MAX_PEBS_EVENTS 8
82#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1) 82#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1)
83 83
84/* 84/*
@@ -95,23 +95,6 @@ struct amd_nb {
95 PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ 95 PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
96 PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER) 96 PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
97 97
98/*
99 * A debug store configuration.
100 *
101 * We only support architectures that use 64bit fields.
102 */
103struct debug_store {
104 u64 bts_buffer_base;
105 u64 bts_index;
106 u64 bts_absolute_maximum;
107 u64 bts_interrupt_threshold;
108 u64 pebs_buffer_base;
109 u64 pebs_index;
110 u64 pebs_absolute_maximum;
111 u64 pebs_interrupt_threshold;
112 u64 pebs_event_reset[MAX_PEBS_EVENTS];
113};
114
115#define PEBS_REGS \ 98#define PEBS_REGS \
116 (PERF_REG_X86_AX | \ 99 (PERF_REG_X86_AX | \
117 PERF_REG_X86_BX | \ 100 PERF_REG_X86_BX | \
@@ -216,6 +199,8 @@ struct cpu_hw_events {
216 * Intel DebugStore bits 199 * Intel DebugStore bits
217 */ 200 */
218 struct debug_store *ds; 201 struct debug_store *ds;
202 void *ds_pebs_vaddr;
203 void *ds_bts_vaddr;
219 u64 pebs_enabled; 204 u64 pebs_enabled;
220 int n_pebs; 205 int n_pebs;
221 int n_large_pebs; 206 int n_large_pebs;
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 219faaec51df..386a6900e206 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -136,6 +136,7 @@
136#endif 136#endif
137 137
138#ifndef __ASSEMBLY__ 138#ifndef __ASSEMBLY__
139#ifndef __BPF__
139/* 140/*
140 * This output constraint should be used for any inline asm which has a "call" 141 * This output constraint should be used for any inline asm which has a "call"
141 * instruction. Otherwise the asm may be inserted before the frame pointer 142 * instruction. Otherwise the asm may be inserted before the frame pointer
@@ -145,5 +146,6 @@
145register unsigned long current_stack_pointer asm(_ASM_SP); 146register unsigned long current_stack_pointer asm(_ASM_SP);
146#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) 147#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
147#endif 148#endif
149#endif
148 150
149#endif /* _ASM_X86_ASM_H */ 151#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
new file mode 100644
index 000000000000..4a7884b8dca5
--- /dev/null
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -0,0 +1,81 @@
1// SPDX-License-Identifier: GPL-2.0
2
3#ifndef _ASM_X86_CPU_ENTRY_AREA_H
4#define _ASM_X86_CPU_ENTRY_AREA_H
5
6#include <linux/percpu-defs.h>
7#include <asm/processor.h>
8#include <asm/intel_ds.h>
9
10/*
11 * cpu_entry_area is a percpu region that contains things needed by the CPU
12 * and early entry/exit code. Real types aren't used for all fields here
13 * to avoid circular header dependencies.
14 *
15 * Every field is a virtual alias of some other allocated backing store.
16 * There is no direct allocation of a struct cpu_entry_area.
17 */
18struct cpu_entry_area {
19 char gdt[PAGE_SIZE];
20
21 /*
22 * The GDT is just below entry_stack and thus serves (on x86_64) as
23 * a a read-only guard page.
24 */
25 struct entry_stack_page entry_stack_page;
26
27 /*
28 * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
29 * we need task switches to work, and task switches write to the TSS.
30 */
31 struct tss_struct tss;
32
33 char entry_trampoline[PAGE_SIZE];
34
35#ifdef CONFIG_X86_64
36 /*
37 * Exception stacks used for IST entries.
38 *
39 * In the future, this should have a separate slot for each stack
40 * with guard pages between them.
41 */
42 char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
43#endif
44#ifdef CONFIG_CPU_SUP_INTEL
45 /*
46 * Per CPU debug store for Intel performance monitoring. Wastes a
47 * full page at the moment.
48 */
49 struct debug_store cpu_debug_store;
50 /*
51 * The actual PEBS/BTS buffers must be mapped to user space
52 * Reserve enough fixmap PTEs.
53 */
54 struct debug_store_buffers cpu_debug_buffers;
55#endif
56};
57
58#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
59#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
60
61DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
62
63extern void setup_cpu_entry_areas(void);
64extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
65
66#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
67#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
68
69#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
70
71#define CPU_ENTRY_AREA_MAP_SIZE \
72 (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
73
74extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
75
76static inline struct entry_stack *cpu_entry_stack(int cpu)
77{
78 return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
79}
80
81#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index bf6a76202a77..ea9a7dde62e5 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
135 set_bit(bit, (unsigned long *)cpu_caps_set); \ 135 set_bit(bit, (unsigned long *)cpu_caps_set); \
136} while (0) 136} while (0)
137 137
138#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
139
138#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) 140#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
139/* 141/*
140 * Static testing of CPU features. Used the same as boot_cpu_has(). 142 * Static testing of CPU features. Used the same as boot_cpu_has().
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 800104c8a3ed..07cdd1715705 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -197,11 +197,12 @@
197#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ 197#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
198#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ 198#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
199#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ 199#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
200#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
200 201
201#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ 202#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
202#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ 203#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
203#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ 204#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
204 205#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
205#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ 206#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
206#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ 207#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
207#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ 208#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
@@ -340,5 +341,6 @@
340#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ 341#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
341#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ 342#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
342#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ 343#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
344#define X86_BUG_CPU_INSECURE X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */
343 345
344#endif /* _ASM_X86_CPUFEATURES_H */ 346#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 4011cb03ef08..13c5ee878a47 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -7,6 +7,7 @@
7#include <asm/mmu.h> 7#include <asm/mmu.h>
8#include <asm/fixmap.h> 8#include <asm/fixmap.h>
9#include <asm/irq_vectors.h> 9#include <asm/irq_vectors.h>
10#include <asm/cpu_entry_area.h>
10 11
11#include <linux/smp.h> 12#include <linux/smp.h>
12#include <linux/percpu.h> 13#include <linux/percpu.h>
@@ -20,6 +21,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
20 21
21 desc->type = (info->read_exec_only ^ 1) << 1; 22 desc->type = (info->read_exec_only ^ 1) << 1;
22 desc->type |= info->contents << 2; 23 desc->type |= info->contents << 2;
24 /* Set the ACCESS bit so it can be mapped RO */
25 desc->type |= 1;
23 26
24 desc->s = 1; 27 desc->s = 1;
25 desc->dpl = 0x3; 28 desc->dpl = 0x3;
@@ -60,17 +63,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
60 return this_cpu_ptr(&gdt_page)->gdt; 63 return this_cpu_ptr(&gdt_page)->gdt;
61} 64}
62 65
63/* Get the fixmap index for a specific processor */
64static inline unsigned int get_cpu_gdt_ro_index(int cpu)
65{
66 return FIX_GDT_REMAP_BEGIN + cpu;
67}
68
69/* Provide the fixmap address of the remapped GDT */ 66/* Provide the fixmap address of the remapped GDT */
70static inline struct desc_struct *get_cpu_gdt_ro(int cpu) 67static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
71{ 68{
72 unsigned int idx = get_cpu_gdt_ro_index(cpu); 69 return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
73 return (struct desc_struct *)__fix_to_virt(idx);
74} 70}
75 71
76/* Provide the current read-only GDT */ 72/* Provide the current read-only GDT */
@@ -185,7 +181,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
185#endif 181#endif
186} 182}
187 183
188static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr) 184static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
189{ 185{
190 struct desc_struct *d = get_cpu_gdt_rw(cpu); 186 struct desc_struct *d = get_cpu_gdt_rw(cpu);
191 tss_desc tss; 187 tss_desc tss;
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 14d6d5007314..b027633e7300 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -50,6 +50,12 @@
50# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31)) 50# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
51#endif 51#endif
52 52
53#ifdef CONFIG_PAGE_TABLE_ISOLATION
54# define DISABLE_PTI 0
55#else
56# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
57#endif
58
53/* 59/*
54 * Make sure to add features to the correct mask 60 * Make sure to add features to the correct mask
55 */ 61 */
@@ -60,7 +66,7 @@
60#define DISABLED_MASK4 (DISABLE_PCID) 66#define DISABLED_MASK4 (DISABLE_PCID)
61#define DISABLED_MASK5 0 67#define DISABLED_MASK5 0
62#define DISABLED_MASK6 0 68#define DISABLED_MASK6 0
63#define DISABLED_MASK7 0 69#define DISABLED_MASK7 (DISABLE_PTI)
64#define DISABLED_MASK8 0 70#define DISABLED_MASK8 0
65#define DISABLED_MASK9 (DISABLE_MPX) 71#define DISABLED_MASK9 (DISABLE_MPX)
66#define DISABLED_MASK10 0 72#define DISABLED_MASK10 0
diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h
index 0211029076ea..6777480d8a42 100644
--- a/arch/x86/include/asm/espfix.h
+++ b/arch/x86/include/asm/espfix.h
@@ -2,7 +2,7 @@
2#ifndef _ASM_X86_ESPFIX_H 2#ifndef _ASM_X86_ESPFIX_H
3#define _ASM_X86_ESPFIX_H 3#define _ASM_X86_ESPFIX_H
4 4
5#ifdef CONFIG_X86_64 5#ifdef CONFIG_X86_ESPFIX64
6 6
7#include <asm/percpu.h> 7#include <asm/percpu.h>
8 8
@@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
11 11
12extern void init_espfix_bsp(void); 12extern void init_espfix_bsp(void);
13extern void init_espfix_ap(int cpu); 13extern void init_espfix_ap(int cpu);
14 14#else
15#endif /* CONFIG_X86_64 */ 15static inline void init_espfix_ap(int cpu) { }
16#endif
16 17
17#endif /* _ASM_X86_ESPFIX_H */ 18#endif /* _ASM_X86_ESPFIX_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index b0c505fe9a95..64c4a30e0d39 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -44,7 +44,6 @@ extern unsigned long __FIXADDR_TOP;
44 PAGE_SIZE) 44 PAGE_SIZE)
45#endif 45#endif
46 46
47
48/* 47/*
49 * Here we define all the compile-time 'special' virtual 48 * Here we define all the compile-time 'special' virtual
50 * addresses. The point is to have a constant address at 49 * addresses. The point is to have a constant address at
@@ -84,7 +83,6 @@ enum fixed_addresses {
84 FIX_IO_APIC_BASE_0, 83 FIX_IO_APIC_BASE_0,
85 FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, 84 FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
86#endif 85#endif
87 FIX_RO_IDT, /* Virtual mapping for read-only IDT */
88#ifdef CONFIG_X86_32 86#ifdef CONFIG_X86_32
89 FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ 87 FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
90 FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, 88 FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
@@ -100,9 +98,6 @@ enum fixed_addresses {
100#ifdef CONFIG_X86_INTEL_MID 98#ifdef CONFIG_X86_INTEL_MID
101 FIX_LNW_VRTC, 99 FIX_LNW_VRTC,
102#endif 100#endif
103 /* Fixmap entries to remap the GDTs, one per processor. */
104 FIX_GDT_REMAP_BEGIN,
105 FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
106 101
107#ifdef CONFIG_ACPI_APEI_GHES 102#ifdef CONFIG_ACPI_APEI_GHES
108 /* Used for GHES mapping from assorted contexts */ 103 /* Used for GHES mapping from assorted contexts */
@@ -143,7 +138,7 @@ enum fixed_addresses {
143extern void reserve_top_address(unsigned long reserve); 138extern void reserve_top_address(unsigned long reserve);
144 139
145#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) 140#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
146#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) 141#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
147 142
148extern int fixmaps_set; 143extern int fixmaps_set;
149 144
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 1b0a5abcd8ae..96aa6b9884dc 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -20,16 +20,7 @@
20#ifndef _ASM_X86_HYPERVISOR_H 20#ifndef _ASM_X86_HYPERVISOR_H
21#define _ASM_X86_HYPERVISOR_H 21#define _ASM_X86_HYPERVISOR_H
22 22
23#ifdef CONFIG_HYPERVISOR_GUEST 23/* x86 hypervisor types */
24
25#include <asm/kvm_para.h>
26#include <asm/x86_init.h>
27#include <asm/xen/hypervisor.h>
28
29/*
30 * x86 hypervisor information
31 */
32
33enum x86_hypervisor_type { 24enum x86_hypervisor_type {
34 X86_HYPER_NATIVE = 0, 25 X86_HYPER_NATIVE = 0,
35 X86_HYPER_VMWARE, 26 X86_HYPER_VMWARE,
@@ -39,6 +30,12 @@ enum x86_hypervisor_type {
39 X86_HYPER_KVM, 30 X86_HYPER_KVM,
40}; 31};
41 32
33#ifdef CONFIG_HYPERVISOR_GUEST
34
35#include <asm/kvm_para.h>
36#include <asm/x86_init.h>
37#include <asm/xen/hypervisor.h>
38
42struct hypervisor_x86 { 39struct hypervisor_x86 {
43 /* Hypervisor name */ 40 /* Hypervisor name */
44 const char *name; 41 const char *name;
@@ -58,7 +55,15 @@ struct hypervisor_x86 {
58 55
59extern enum x86_hypervisor_type x86_hyper_type; 56extern enum x86_hypervisor_type x86_hyper_type;
60extern void init_hypervisor_platform(void); 57extern void init_hypervisor_platform(void);
58static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
59{
60 return x86_hyper_type == type;
61}
61#else 62#else
62static inline void init_hypervisor_platform(void) { } 63static inline void init_hypervisor_platform(void) { }
64static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
65{
66 return type == X86_HYPER_NATIVE;
67}
63#endif /* CONFIG_HYPERVISOR_GUEST */ 68#endif /* CONFIG_HYPERVISOR_GUEST */
64#endif /* _ASM_X86_HYPERVISOR_H */ 69#endif /* _ASM_X86_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
new file mode 100644
index 000000000000..62a9f4966b42
--- /dev/null
+++ b/arch/x86/include/asm/intel_ds.h
@@ -0,0 +1,36 @@
1#ifndef _ASM_INTEL_DS_H
2#define _ASM_INTEL_DS_H
3
4#include <linux/percpu-defs.h>
5
6#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
7#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
8
9/* The maximal number of PEBS events: */
10#define MAX_PEBS_EVENTS 8
11
12/*
13 * A debug store configuration.
14 *
15 * We only support architectures that use 64bit fields.
16 */
17struct debug_store {
18 u64 bts_buffer_base;
19 u64 bts_index;
20 u64 bts_absolute_maximum;
21 u64 bts_interrupt_threshold;
22 u64 pebs_buffer_base;
23 u64 pebs_index;
24 u64 pebs_absolute_maximum;
25 u64 pebs_interrupt_threshold;
26 u64 pebs_event_reset[MAX_PEBS_EVENTS];
27} __aligned(PAGE_SIZE);
28
29DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
30
31struct debug_store_buffers {
32 char bts_buffer[BTS_BUFFER_SIZE];
33 char pebs_buffer[PEBS_BUFFER_SIZE];
34};
35
36#endif
diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h
new file mode 100644
index 000000000000..989cfa86de85
--- /dev/null
+++ b/arch/x86/include/asm/invpcid.h
@@ -0,0 +1,53 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_X86_INVPCID
3#define _ASM_X86_INVPCID
4
5static inline void __invpcid(unsigned long pcid, unsigned long addr,
6 unsigned long type)
7{
8 struct { u64 d[2]; } desc = { { pcid, addr } };
9
10 /*
11 * The memory clobber is because the whole point is to invalidate
12 * stale TLB entries and, especially if we're flushing global
13 * mappings, we don't want the compiler to reorder any subsequent
14 * memory accesses before the TLB flush.
15 *
16 * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
17 * invpcid (%rcx), %rax in long mode.
18 */
19 asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
20 : : "m" (desc), "a" (type), "c" (&desc) : "memory");
21}
22
23#define INVPCID_TYPE_INDIV_ADDR 0
24#define INVPCID_TYPE_SINGLE_CTXT 1
25#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
26#define INVPCID_TYPE_ALL_NON_GLOBAL 3
27
28/* Flush all mappings for a given pcid and addr, not including globals. */
29static inline void invpcid_flush_one(unsigned long pcid,
30 unsigned long addr)
31{
32 __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
33}
34
35/* Flush all mappings for a given PCID, not including globals. */
36static inline void invpcid_flush_single_context(unsigned long pcid)
37{
38 __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
39}
40
41/* Flush all mappings, including globals, for all PCIDs. */
42static inline void invpcid_flush_all(void)
43{
44 __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
45}
46
47/* Flush all mappings for all PCIDs except globals. */
48static inline void invpcid_flush_all_nonglobals(void)
49{
50 __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
51}
52
53#endif /* _ASM_X86_INVPCID */
diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
index 139feef467f7..c066ffae222b 100644
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -44,7 +44,7 @@ extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
44extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, 44extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
45 unsigned int nr_irqs); 45 unsigned int nr_irqs);
46extern int mp_irqdomain_activate(struct irq_domain *domain, 46extern int mp_irqdomain_activate(struct irq_domain *domain,
47 struct irq_data *irq_data, bool early); 47 struct irq_data *irq_data, bool reserve);
48extern void mp_irqdomain_deactivate(struct irq_domain *domain, 48extern void mp_irqdomain_deactivate(struct irq_domain *domain,
49 struct irq_data *irq_data); 49 struct irq_data *irq_data);
50extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain); 50extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain);
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index c8ef23f2c28f..89f08955fff7 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void)
142 swapgs; \ 142 swapgs; \
143 sysretl 143 sysretl
144 144
145#ifdef CONFIG_DEBUG_ENTRY
146#define SAVE_FLAGS(x) pushfq; popq %rax
147#endif
145#else 148#else
146#define INTERRUPT_RETURN iret 149#define INTERRUPT_RETURN iret
147#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit 150#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index f86a8caa561e..395c9631e000 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
26extern int __must_check __die(const char *, struct pt_regs *, long); 26extern int __must_check __die(const char *, struct pt_regs *, long);
27extern void show_stack_regs(struct pt_regs *regs); 27extern void show_stack_regs(struct pt_regs *regs);
28extern void __show_regs(struct pt_regs *regs, int all); 28extern void __show_regs(struct pt_regs *regs, int all);
29extern void show_iret_regs(struct pt_regs *regs);
29extern unsigned long oops_begin(void); 30extern unsigned long oops_begin(void);
30extern void oops_end(unsigned long, struct pt_regs *, int signr); 31extern void oops_end(unsigned long, struct pt_regs *, int signr);
31 32
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 9ea26f167497..5ff3e8af2c20 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -3,6 +3,7 @@
3#define _ASM_X86_MMU_H 3#define _ASM_X86_MMU_H
4 4
5#include <linux/spinlock.h> 5#include <linux/spinlock.h>
6#include <linux/rwsem.h>
6#include <linux/mutex.h> 7#include <linux/mutex.h>
7#include <linux/atomic.h> 8#include <linux/atomic.h>
8 9
@@ -27,7 +28,8 @@ typedef struct {
27 atomic64_t tlb_gen; 28 atomic64_t tlb_gen;
28 29
29#ifdef CONFIG_MODIFY_LDT_SYSCALL 30#ifdef CONFIG_MODIFY_LDT_SYSCALL
30 struct ldt_struct *ldt; 31 struct rw_semaphore ldt_usr_sem;
32 struct ldt_struct *ldt;
31#endif 33#endif
32 34
33#ifdef CONFIG_X86_64 35#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 6d16d15d09a0..c931b88982a0 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -50,22 +50,53 @@ struct ldt_struct {
50 * call gates. On native, we could merge the ldt_struct and LDT 50 * call gates. On native, we could merge the ldt_struct and LDT
51 * allocations, but it's not worth trying to optimize. 51 * allocations, but it's not worth trying to optimize.
52 */ 52 */
53 struct desc_struct *entries; 53 struct desc_struct *entries;
54 unsigned int nr_entries; 54 unsigned int nr_entries;
55
56 /*
57 * If PTI is in use, then the entries array is not mapped while we're
58 * in user mode. The whole array will be aliased at the addressed
59 * given by ldt_slot_va(slot). We use two slots so that we can allocate
60 * and map, and enable a new LDT without invalidating the mapping
61 * of an older, still-in-use LDT.
62 *
63 * slot will be -1 if this LDT doesn't have an alias mapping.
64 */
65 int slot;
55}; 66};
56 67
68/* This is a multiple of PAGE_SIZE. */
69#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
70
71static inline void *ldt_slot_va(int slot)
72{
73#ifdef CONFIG_X86_64
74 return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
75#else
76 BUG();
77#endif
78}
79
57/* 80/*
58 * Used for LDT copy/destruction. 81 * Used for LDT copy/destruction.
59 */ 82 */
60int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm); 83static inline void init_new_context_ldt(struct mm_struct *mm)
84{
85 mm->context.ldt = NULL;
86 init_rwsem(&mm->context.ldt_usr_sem);
87}
88int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
61void destroy_context_ldt(struct mm_struct *mm); 89void destroy_context_ldt(struct mm_struct *mm);
90void ldt_arch_exit_mmap(struct mm_struct *mm);
62#else /* CONFIG_MODIFY_LDT_SYSCALL */ 91#else /* CONFIG_MODIFY_LDT_SYSCALL */
63static inline int init_new_context_ldt(struct task_struct *tsk, 92static inline void init_new_context_ldt(struct mm_struct *mm) { }
64 struct mm_struct *mm) 93static inline int ldt_dup_context(struct mm_struct *oldmm,
94 struct mm_struct *mm)
65{ 95{
66 return 0; 96 return 0;
67} 97}
68static inline void destroy_context_ldt(struct mm_struct *mm) {} 98static inline void destroy_context_ldt(struct mm_struct *mm) { }
99static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
69#endif 100#endif
70 101
71static inline void load_mm_ldt(struct mm_struct *mm) 102static inline void load_mm_ldt(struct mm_struct *mm)
@@ -90,10 +121,31 @@ static inline void load_mm_ldt(struct mm_struct *mm)
90 * that we can see. 121 * that we can see.
91 */ 122 */
92 123
93 if (unlikely(ldt)) 124 if (unlikely(ldt)) {
94 set_ldt(ldt->entries, ldt->nr_entries); 125 if (static_cpu_has(X86_FEATURE_PTI)) {
95 else 126 if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
127 /*
128 * Whoops -- either the new LDT isn't mapped
129 * (if slot == -1) or is mapped into a bogus
130 * slot (if slot > 1).
131 */
132 clear_LDT();
133 return;
134 }
135
136 /*
137 * If page table isolation is enabled, ldt->entries
138 * will not be mapped in the userspace pagetables.
139 * Tell the CPU to access the LDT through the alias
140 * at ldt_slot_va(ldt->slot).
141 */
142 set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
143 } else {
144 set_ldt(ldt->entries, ldt->nr_entries);
145 }
146 } else {
96 clear_LDT(); 147 clear_LDT();
148 }
97#else 149#else
98 clear_LDT(); 150 clear_LDT();
99#endif 151#endif
@@ -132,18 +184,21 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
132static inline int init_new_context(struct task_struct *tsk, 184static inline int init_new_context(struct task_struct *tsk,
133 struct mm_struct *mm) 185 struct mm_struct *mm)
134{ 186{
187 mutex_init(&mm->context.lock);
188
135 mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); 189 mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
136 atomic64_set(&mm->context.tlb_gen, 0); 190 atomic64_set(&mm->context.tlb_gen, 0);
137 191
138 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS 192#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
139 if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { 193 if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
140 /* pkey 0 is the default and always allocated */ 194 /* pkey 0 is the default and always allocated */
141 mm->context.pkey_allocation_map = 0x1; 195 mm->context.pkey_allocation_map = 0x1;
142 /* -1 means unallocated or invalid */ 196 /* -1 means unallocated or invalid */
143 mm->context.execute_only_pkey = -1; 197 mm->context.execute_only_pkey = -1;
144 } 198 }
145 #endif 199#endif
146 return init_new_context_ldt(tsk, mm); 200 init_new_context_ldt(mm);
201 return 0;
147} 202}
148static inline void destroy_context(struct mm_struct *mm) 203static inline void destroy_context(struct mm_struct *mm)
149{ 204{
@@ -176,15 +231,16 @@ do { \
176} while (0) 231} while (0)
177#endif 232#endif
178 233
179static inline void arch_dup_mmap(struct mm_struct *oldmm, 234static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
180 struct mm_struct *mm)
181{ 235{
182 paravirt_arch_dup_mmap(oldmm, mm); 236 paravirt_arch_dup_mmap(oldmm, mm);
237 return ldt_dup_context(oldmm, mm);
183} 238}
184 239
185static inline void arch_exit_mmap(struct mm_struct *mm) 240static inline void arch_exit_mmap(struct mm_struct *mm)
186{ 241{
187 paravirt_arch_exit_mmap(mm); 242 paravirt_arch_exit_mmap(mm);
243 ldt_arch_exit_mmap(mm);
188} 244}
189 245
190#ifdef CONFIG_X86_64 246#ifdef CONFIG_X86_64
@@ -282,33 +338,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
282} 338}
283 339
284/* 340/*
285 * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
286 * bits. This serves two purposes. It prevents a nasty situation in
287 * which PCID-unaware code saves CR3, loads some other value (with PCID
288 * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
289 * the saved ASID was nonzero. It also means that any bugs involving
290 * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
291 * deterministically.
292 */
293
294static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
295{
296 if (static_cpu_has(X86_FEATURE_PCID)) {
297 VM_WARN_ON_ONCE(asid > 4094);
298 return __sme_pa(mm->pgd) | (asid + 1);
299 } else {
300 VM_WARN_ON_ONCE(asid != 0);
301 return __sme_pa(mm->pgd);
302 }
303}
304
305static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
306{
307 VM_WARN_ON_ONCE(asid > 4094);
308 return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
309}
310
311/*
312 * This can be used from process context to figure out what the value of 341 * This can be used from process context to figure out what the value of
313 * CR3 is without needing to do a (slow) __read_cr3(). 342 * CR3 is without needing to do a (slow) __read_cr3().
314 * 343 *
@@ -317,7 +346,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
317 */ 346 */
318static inline unsigned long __get_current_cr3_fast(void) 347static inline unsigned long __get_current_cr3_fast(void)
319{ 348{
320 unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm), 349 unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
321 this_cpu_read(cpu_tlbstate.loaded_mm_asid)); 350 this_cpu_read(cpu_tlbstate.loaded_mm_asid));
322 351
323 /* For now, be very restrictive about when this can be called. */ 352 /* For now, be very restrictive about when this can be called. */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 283efcaac8af..892df375b615 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -927,6 +927,15 @@ extern void default_banner(void);
927 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ 927 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
928 CLBR_NONE, \ 928 CLBR_NONE, \
929 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) 929 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
930
931#ifdef CONFIG_DEBUG_ENTRY
932#define SAVE_FLAGS(clobbers) \
933 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
934 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
935 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \
936 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
937#endif
938
930#endif /* CONFIG_X86_32 */ 939#endif /* CONFIG_X86_32 */
931 940
932#endif /* __ASSEMBLY__ */ 941#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index 4b5e1eafada7..aff42e1da6ee 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -30,6 +30,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
30 */ 30 */
31extern gfp_t __userpte_alloc_gfp; 31extern gfp_t __userpte_alloc_gfp;
32 32
33#ifdef CONFIG_PAGE_TABLE_ISOLATION
34/*
35 * Instead of one PGD, we acquire two PGDs. Being order-1, it is
36 * both 8k in size and 8k-aligned. That lets us just flip bit 12
37 * in a pointer to swap between the two 4k halves.
38 */
39#define PGD_ALLOCATION_ORDER 1
40#else
41#define PGD_ALLOCATION_ORDER 0
42#endif
43
33/* 44/*
34 * Allocate and free page tables. 45 * Allocate and free page tables.
35 */ 46 */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 95e2dfd75521..e42b8943cb1a 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -28,6 +28,7 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD];
28int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); 28int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
29 29
30void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); 30void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
31void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
31void ptdump_walk_pgd_level_checkwx(void); 32void ptdump_walk_pgd_level_checkwx(void);
32 33
33#ifdef CONFIG_DEBUG_WX 34#ifdef CONFIG_DEBUG_WX
@@ -841,7 +842,12 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
841 842
842static inline int p4d_bad(p4d_t p4d) 843static inline int p4d_bad(p4d_t p4d)
843{ 844{
844 return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0; 845 unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;
846
847 if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
848 ignore_flags |= _PAGE_NX;
849
850 return (p4d_flags(p4d) & ~ignore_flags) != 0;
845} 851}
846#endif /* CONFIG_PGTABLE_LEVELS > 3 */ 852#endif /* CONFIG_PGTABLE_LEVELS > 3 */
847 853
@@ -875,7 +881,12 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
875 881
876static inline int pgd_bad(pgd_t pgd) 882static inline int pgd_bad(pgd_t pgd)
877{ 883{
878 return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE; 884 unsigned long ignore_flags = _PAGE_USER;
885
886 if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
887 ignore_flags |= _PAGE_NX;
888
889 return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
879} 890}
880 891
881static inline int pgd_none(pgd_t pgd) 892static inline int pgd_none(pgd_t pgd)
@@ -904,7 +915,11 @@ static inline int pgd_none(pgd_t pgd)
904 * pgd_offset() returns a (pgd_t *) 915 * pgd_offset() returns a (pgd_t *)
905 * pgd_index() is used get the offset into the pgd page's array of pgd_t's; 916 * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
906 */ 917 */
907#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address))) 918#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
919/*
920 * a shortcut to get a pgd_t in a given mm
921 */
922#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
908/* 923/*
909 * a shortcut which implies the use of the kernel's pgd, instead 924 * a shortcut which implies the use of the kernel's pgd, instead
910 * of a process's 925 * of a process's
@@ -1106,7 +1121,14 @@ static inline int pud_write(pud_t pud)
1106 */ 1121 */
1107static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) 1122static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
1108{ 1123{
1109 memcpy(dst, src, count * sizeof(pgd_t)); 1124 memcpy(dst, src, count * sizeof(pgd_t));
1125#ifdef CONFIG_PAGE_TABLE_ISOLATION
1126 if (!static_cpu_has(X86_FEATURE_PTI))
1127 return;
1128 /* Clone the user space pgd as well */
1129 memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src),
1130 count * sizeof(pgd_t));
1131#endif
1110} 1132}
1111 1133
1112#define PTE_SHIFT ilog2(PTRS_PER_PTE) 1134#define PTE_SHIFT ilog2(PTRS_PER_PTE)
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index f2ca9b28fd68..ce245b0cdfca 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
38#define LAST_PKMAP 1024 38#define LAST_PKMAP 1024
39#endif 39#endif
40 40
41#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1)) \ 41/*
42 & PMD_MASK) 42 * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
43 * to avoid include recursion hell
44 */
45#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40)
46
47#define CPU_ENTRY_AREA_BASE \
48 ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
49
50#define PKMAP_BASE \
51 ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
43 52
44#ifdef CONFIG_HIGHMEM 53#ifdef CONFIG_HIGHMEM
45# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) 54# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
46#else 55#else
47# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) 56# define VMALLOC_END (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
48#endif 57#endif
49 58
50#define MODULES_VADDR VMALLOC_START 59#define MODULES_VADDR VMALLOC_START
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index e9f05331e732..81462e9a34f6 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -131,9 +131,97 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
131#endif 131#endif
132} 132}
133 133
134#ifdef CONFIG_PAGE_TABLE_ISOLATION
135/*
136 * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
137 * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and
138 * the user one is in the last 4k. To switch between them, you
139 * just need to flip the 12th bit in their addresses.
140 */
141#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT
142
143/*
144 * This generates better code than the inline assembly in
145 * __set_bit().
146 */
147static inline void *ptr_set_bit(void *ptr, int bit)
148{
149 unsigned long __ptr = (unsigned long)ptr;
150
151 __ptr |= BIT(bit);
152 return (void *)__ptr;
153}
154static inline void *ptr_clear_bit(void *ptr, int bit)
155{
156 unsigned long __ptr = (unsigned long)ptr;
157
158 __ptr &= ~BIT(bit);
159 return (void *)__ptr;
160}
161
162static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
163{
164 return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
165}
166
167static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
168{
169 return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
170}
171
172static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
173{
174 return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
175}
176
177static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
178{
179 return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
180}
181#endif /* CONFIG_PAGE_TABLE_ISOLATION */
182
183/*
184 * Page table pages are page-aligned. The lower half of the top
185 * level is used for userspace and the top half for the kernel.
186 *
187 * Returns true for parts of the PGD that map userspace and
188 * false for the parts that map the kernel.
189 */
190static inline bool pgdp_maps_userspace(void *__ptr)
191{
192 unsigned long ptr = (unsigned long)__ptr;
193
194 return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2);
195}
196
197#ifdef CONFIG_PAGE_TABLE_ISOLATION
198pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd);
199
200/*
201 * Take a PGD location (pgdp) and a pgd value that needs to be set there.
202 * Populates the user and returns the resulting PGD that must be set in
203 * the kernel copy of the page tables.
204 */
205static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
206{
207 if (!static_cpu_has(X86_FEATURE_PTI))
208 return pgd;
209 return __pti_set_user_pgd(pgdp, pgd);
210}
211#else
212static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
213{
214 return pgd;
215}
216#endif
217
134static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) 218static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
135{ 219{
220#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
221 p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
222#else
136 *p4dp = p4d; 223 *p4dp = p4d;
224#endif
137} 225}
138 226
139static inline void native_p4d_clear(p4d_t *p4d) 227static inline void native_p4d_clear(p4d_t *p4d)
@@ -147,7 +235,11 @@ static inline void native_p4d_clear(p4d_t *p4d)
147 235
148static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) 236static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
149{ 237{
238#ifdef CONFIG_PAGE_TABLE_ISOLATION
239 *pgdp = pti_set_user_pgd(pgdp, pgd);
240#else
150 *pgdp = pgd; 241 *pgdp = pgd;
242#endif
151} 243}
152 244
153static inline void native_pgd_clear(pgd_t *pgd) 245static inline void native_pgd_clear(pgd_t *pgd)
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 6d5f45dcd4a1..b97a539bcdee 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -76,32 +76,45 @@ typedef struct { pteval_t pte; } pte_t;
76#define PGDIR_MASK (~(PGDIR_SIZE - 1)) 76#define PGDIR_MASK (~(PGDIR_SIZE - 1))
77 77
78/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ 78/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
79#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) 79#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
80
80#ifdef CONFIG_X86_5LEVEL 81#ifdef CONFIG_X86_5LEVEL
81#define VMALLOC_SIZE_TB _AC(16384, UL) 82# define VMALLOC_SIZE_TB _AC(12800, UL)
82#define __VMALLOC_BASE _AC(0xff92000000000000, UL) 83# define __VMALLOC_BASE _AC(0xffa0000000000000, UL)
83#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) 84# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
85# define LDT_PGD_ENTRY _AC(-112, UL)
86# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
84#else 87#else
85#define VMALLOC_SIZE_TB _AC(32, UL) 88# define VMALLOC_SIZE_TB _AC(32, UL)
86#define __VMALLOC_BASE _AC(0xffffc90000000000, UL) 89# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
87#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) 90# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
91# define LDT_PGD_ENTRY _AC(-4, UL)
92# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
88#endif 93#endif
94
89#ifdef CONFIG_RANDOMIZE_MEMORY 95#ifdef CONFIG_RANDOMIZE_MEMORY
90#define VMALLOC_START vmalloc_base 96# define VMALLOC_START vmalloc_base
91#define VMEMMAP_START vmemmap_base 97# define VMEMMAP_START vmemmap_base
92#else 98#else
93#define VMALLOC_START __VMALLOC_BASE 99# define VMALLOC_START __VMALLOC_BASE
94#define VMEMMAP_START __VMEMMAP_BASE 100# define VMEMMAP_START __VMEMMAP_BASE
95#endif /* CONFIG_RANDOMIZE_MEMORY */ 101#endif /* CONFIG_RANDOMIZE_MEMORY */
96#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) 102
97#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) 103#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
104
105#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
98/* The module sections ends with the start of the fixmap */ 106/* The module sections ends with the start of the fixmap */
99#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1) 107#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
100#define MODULES_LEN (MODULES_END - MODULES_VADDR) 108#define MODULES_LEN (MODULES_END - MODULES_VADDR)
101#define ESPFIX_PGD_ENTRY _AC(-2, UL) 109
102#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT) 110#define ESPFIX_PGD_ENTRY _AC(-2, UL)
103#define EFI_VA_START ( -4 * (_AC(1, UL) << 30)) 111#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
104#define EFI_VA_END (-68 * (_AC(1, UL) << 30)) 112
113#define CPU_ENTRY_AREA_PGD _AC(-3, UL)
114#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
115
116#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
117#define EFI_VA_END (-68 * (_AC(1, UL) << 30))
105 118
106#define EARLY_DYNAMIC_PAGE_TABLES 64 119#define EARLY_DYNAMIC_PAGE_TABLES 64
107 120
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 43212a43ee69..6a60fea90b9d 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -38,6 +38,11 @@
38#define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull) 38#define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull)
39#define CR3_PCID_MASK 0xFFFull 39#define CR3_PCID_MASK 0xFFFull
40#define CR3_NOFLUSH BIT_ULL(63) 40#define CR3_NOFLUSH BIT_ULL(63)
41
42#ifdef CONFIG_PAGE_TABLE_ISOLATION
43# define X86_CR3_PTI_SWITCH_BIT 11
44#endif
45
41#else 46#else
42/* 47/*
43 * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save 48 * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index cc16fa882e3e..d3a67fba200a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -163,9 +163,9 @@ enum cpuid_regs_idx {
163extern struct cpuinfo_x86 boot_cpu_data; 163extern struct cpuinfo_x86 boot_cpu_data;
164extern struct cpuinfo_x86 new_cpu_data; 164extern struct cpuinfo_x86 new_cpu_data;
165 165
166extern struct tss_struct doublefault_tss; 166extern struct x86_hw_tss doublefault_tss;
167extern __u32 cpu_caps_cleared[NCAPINTS]; 167extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
168extern __u32 cpu_caps_set[NCAPINTS]; 168extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
169 169
170#ifdef CONFIG_SMP 170#ifdef CONFIG_SMP
171DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); 171DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
@@ -253,6 +253,11 @@ static inline void load_cr3(pgd_t *pgdir)
253 write_cr3(__sme_pa(pgdir)); 253 write_cr3(__sme_pa(pgdir));
254} 254}
255 255
256/*
257 * Note that while the legacy 'TSS' name comes from 'Task State Segment',
258 * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
259 * unrelated to the task-switch mechanism:
260 */
256#ifdef CONFIG_X86_32 261#ifdef CONFIG_X86_32
257/* This is the TSS defined by the hardware. */ 262/* This is the TSS defined by the hardware. */
258struct x86_hw_tss { 263struct x86_hw_tss {
@@ -305,7 +310,13 @@ struct x86_hw_tss {
305struct x86_hw_tss { 310struct x86_hw_tss {
306 u32 reserved1; 311 u32 reserved1;
307 u64 sp0; 312 u64 sp0;
313
314 /*
315 * We store cpu_current_top_of_stack in sp1 so it's always accessible.
316 * Linux does not use ring 1, so sp1 is not otherwise needed.
317 */
308 u64 sp1; 318 u64 sp1;
319
309 u64 sp2; 320 u64 sp2;
310 u64 reserved2; 321 u64 reserved2;
311 u64 ist[7]; 322 u64 ist[7];
@@ -323,12 +334,22 @@ struct x86_hw_tss {
323#define IO_BITMAP_BITS 65536 334#define IO_BITMAP_BITS 65536
324#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) 335#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
325#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) 336#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
326#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap) 337#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
327#define INVALID_IO_BITMAP_OFFSET 0x8000 338#define INVALID_IO_BITMAP_OFFSET 0x8000
328 339
340struct entry_stack {
341 unsigned long words[64];
342};
343
344struct entry_stack_page {
345 struct entry_stack stack;
346} __aligned(PAGE_SIZE);
347
329struct tss_struct { 348struct tss_struct {
330 /* 349 /*
331 * The hardware state: 350 * The fixed hardware portion. This must not cross a page boundary
351 * at risk of violating the SDM's advice and potentially triggering
352 * errata.
332 */ 353 */
333 struct x86_hw_tss x86_tss; 354 struct x86_hw_tss x86_tss;
334 355
@@ -339,18 +360,9 @@ struct tss_struct {
339 * be within the limit. 360 * be within the limit.
340 */ 361 */
341 unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; 362 unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
363} __aligned(PAGE_SIZE);
342 364
343#ifdef CONFIG_X86_32 365DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
344 /*
345 * Space for the temporary SYSENTER stack.
346 */
347 unsigned long SYSENTER_stack_canary;
348 unsigned long SYSENTER_stack[64];
349#endif
350
351} ____cacheline_aligned;
352
353DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
354 366
355/* 367/*
356 * sizeof(unsigned long) coming from an extra "long" at the end 368 * sizeof(unsigned long) coming from an extra "long" at the end
@@ -364,6 +376,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
364 376
365#ifdef CONFIG_X86_32 377#ifdef CONFIG_X86_32
366DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); 378DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
379#else
380/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
381#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
367#endif 382#endif
368 383
369/* 384/*
@@ -523,7 +538,7 @@ static inline void native_set_iopl_mask(unsigned mask)
523static inline void 538static inline void
524native_load_sp0(unsigned long sp0) 539native_load_sp0(unsigned long sp0)
525{ 540{
526 this_cpu_write(cpu_tss.x86_tss.sp0, sp0); 541 this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
527} 542}
528 543
529static inline void native_swapgs(void) 544static inline void native_swapgs(void)
@@ -535,12 +550,12 @@ static inline void native_swapgs(void)
535 550
536static inline unsigned long current_top_of_stack(void) 551static inline unsigned long current_top_of_stack(void)
537{ 552{
538#ifdef CONFIG_X86_64 553 /*
539 return this_cpu_read_stable(cpu_tss.x86_tss.sp0); 554 * We can't read directly from tss.sp0: sp0 on x86_32 is special in
540#else 555 * and around vm86 mode and sp0 on x86_64 is special because of the
541 /* sp0 on x86_32 is special in and around vm86 mode. */ 556 * entry trampoline.
557 */
542 return this_cpu_read_stable(cpu_current_top_of_stack); 558 return this_cpu_read_stable(cpu_current_top_of_stack);
543#endif
544} 559}
545 560
546static inline bool on_thread_stack(void) 561static inline bool on_thread_stack(void)
@@ -837,13 +852,22 @@ static inline void spin_lock_prefetch(const void *x)
837 852
838#else 853#else
839/* 854/*
840 * User space process size. 47bits minus one guard page. The guard 855 * User space process size. This is the first address outside the user range.
841 * page is necessary on Intel CPUs: if a SYSCALL instruction is at 856 * There are a few constraints that determine this:
842 * the highest possible canonical userspace address, then that 857 *
843 * syscall will enter the kernel with a non-canonical return 858 * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
844 * address, and SYSRET will explode dangerously. We avoid this 859 * address, then that syscall will enter the kernel with a
845 * particular problem by preventing anything from being mapped 860 * non-canonical return address, and SYSRET will explode dangerously.
846 * at the maximum canonical address. 861 * We avoid this particular problem by preventing anything executable
862 * from being mapped at the maximum canonical address.
863 *
864 * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
865 * CPUs malfunction if they execute code from the highest canonical page.
866 * They'll speculate right off the end of the canonical space, and
867 * bad things happen. This is worked around in the same way as the
868 * Intel problem.
869 *
870 * With page table isolation enabled, we map the LDT in ... [stay tuned]
847 */ 871 */
848#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE) 872#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
849 873
diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
new file mode 100644
index 000000000000..0b5ef05b2d2d
--- /dev/null
+++ b/arch/x86/include/asm/pti.h
@@ -0,0 +1,14 @@
1// SPDX-License-Identifier: GPL-2.0
2#ifndef _ASM_X86_PTI_H
3#define _ASM_X86_PTI_H
4#ifndef __ASSEMBLY__
5
6#ifdef CONFIG_PAGE_TABLE_ISOLATION
7extern void pti_init(void);
8extern void pti_check_boottime_disable(void);
9#else
10static inline void pti_check_boottime_disable(void) { }
11#endif
12
13#endif /* __ASSEMBLY__ */
14#endif /* _ASM_X86_PTI_H */
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 8da111b3c342..f73706878772 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -16,6 +16,7 @@ enum stack_type {
16 STACK_TYPE_TASK, 16 STACK_TYPE_TASK,
17 STACK_TYPE_IRQ, 17 STACK_TYPE_IRQ,
18 STACK_TYPE_SOFTIRQ, 18 STACK_TYPE_SOFTIRQ,
19 STACK_TYPE_ENTRY,
19 STACK_TYPE_EXCEPTION, 20 STACK_TYPE_EXCEPTION,
20 STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, 21 STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
21}; 22};
@@ -28,6 +29,8 @@ struct stack_info {
28bool in_task_stack(unsigned long *stack, struct task_struct *task, 29bool in_task_stack(unsigned long *stack, struct task_struct *task,
29 struct stack_info *info); 30 struct stack_info *info);
30 31
32bool in_entry_stack(unsigned long *stack, struct stack_info *info);
33
31int get_stack_info(unsigned long *stack, struct task_struct *task, 34int get_stack_info(unsigned long *stack, struct task_struct *task,
32 struct stack_info *info, unsigned long *visit_mask); 35 struct stack_info *info, unsigned long *visit_mask);
33 36
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index 982c325dad33..8be6afb58471 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -12,7 +12,13 @@
12 12
13/* image of the saved processor state */ 13/* image of the saved processor state */
14struct saved_context { 14struct saved_context {
15 u16 es, fs, gs, ss; 15 /*
16 * On x86_32, all segment registers, with the possible exception of
17 * gs, are saved at kernel entry in pt_regs.
18 */
19#ifdef CONFIG_X86_32_LAZY_GS
20 u16 gs;
21#endif
16 unsigned long cr0, cr2, cr3, cr4; 22 unsigned long cr0, cr2, cr3, cr4;
17 u64 misc_enable; 23 u64 misc_enable;
18 bool misc_enable_saved; 24 bool misc_enable_saved;
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index 7306e911faee..a7af9f53c0cb 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -20,8 +20,20 @@
20 */ 20 */
21struct saved_context { 21struct saved_context {
22 struct pt_regs regs; 22 struct pt_regs regs;
23 u16 ds, es, fs, gs, ss; 23
24 unsigned long gs_base, gs_kernel_base, fs_base; 24 /*
25 * User CS and SS are saved in current_pt_regs(). The rest of the
26 * segment selectors need to be saved and restored here.
27 */
28 u16 ds, es, fs, gs;
29
30 /*
31 * Usermode FSBASE and GSBASE may not match the fs and gs selectors,
32 * so we save them separately. We save the kernelmode GSBASE to
33 * restore percpu access after resume.
34 */
35 unsigned long kernelmode_gs_base, usermode_gs_base, fs_base;
36
25 unsigned long cr0, cr2, cr3, cr4, cr8; 37 unsigned long cr0, cr2, cr3, cr4, cr8;
26 u64 misc_enable; 38 u64 misc_enable;
27 bool misc_enable_saved; 39 bool misc_enable_saved;
@@ -30,8 +42,7 @@ struct saved_context {
30 u16 gdt_pad; /* Unused */ 42 u16 gdt_pad; /* Unused */
31 struct desc_ptr gdt_desc; 43 struct desc_ptr gdt_desc;
32 u16 idt_pad; 44 u16 idt_pad;
33 u16 idt_limit; 45 struct desc_ptr idt;
34 unsigned long idt_base;
35 u16 ldt; 46 u16 ldt;
36 u16 tss; 47 u16 tss;
37 unsigned long tr; 48 unsigned long tr;
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 8c6bd6863db9..eb5f7999a893 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -16,8 +16,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
16 struct tss_struct *tss); 16 struct tss_struct *tss);
17 17
18/* This runs runs on the previous thread's stack. */ 18/* This runs runs on the previous thread's stack. */
19static inline void prepare_switch_to(struct task_struct *prev, 19static inline void prepare_switch_to(struct task_struct *next)
20 struct task_struct *next)
21{ 20{
22#ifdef CONFIG_VMAP_STACK 21#ifdef CONFIG_VMAP_STACK
23 /* 22 /*
@@ -70,7 +69,7 @@ struct fork_frame {
70 69
71#define switch_to(prev, next, last) \ 70#define switch_to(prev, next, last) \
72do { \ 71do { \
73 prepare_switch_to(prev, next); \ 72 prepare_switch_to(next); \
74 \ 73 \
75 ((last) = __switch_to_asm((prev), (next))); \ 74 ((last) = __switch_to_asm((prev), (next))); \
76} while (0) 75} while (0)
@@ -79,10 +78,10 @@ do { \
79static inline void refresh_sysenter_cs(struct thread_struct *thread) 78static inline void refresh_sysenter_cs(struct thread_struct *thread)
80{ 79{
81 /* Only happens when SEP is enabled, no need to test "SEP"arately: */ 80 /* Only happens when SEP is enabled, no need to test "SEP"arately: */
82 if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs)) 81 if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs))
83 return; 82 return;
84 83
85 this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs); 84 this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
86 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); 85 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
87} 86}
88#endif 87#endif
@@ -90,10 +89,12 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
90/* This is used when switching tasks or entering/exiting vm86 mode. */ 89/* This is used when switching tasks or entering/exiting vm86 mode. */
91static inline void update_sp0(struct task_struct *task) 90static inline void update_sp0(struct task_struct *task)
92{ 91{
92 /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
93#ifdef CONFIG_X86_32 93#ifdef CONFIG_X86_32
94 load_sp0(task->thread.sp0); 94 load_sp0(task->thread.sp0);
95#else 95#else
96 load_sp0(task_top_of_stack(task)); 96 if (static_cpu_has(X86_FEATURE_XENPV))
97 load_sp0(task_top_of_stack(task));
97#endif 98#endif
98} 99}
99 100
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 70f425947dc5..00223333821a 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack,
207#else /* !__ASSEMBLY__ */ 207#else /* !__ASSEMBLY__ */
208 208
209#ifdef CONFIG_X86_64 209#ifdef CONFIG_X86_64
210# define cpu_current_top_of_stack (cpu_tss + TSS_sp0) 210# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
211#endif 211#endif
212 212
213#endif 213#endif
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 877b5c1a1b12..4a08dd2ab32a 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -9,70 +9,130 @@
9#include <asm/cpufeature.h> 9#include <asm/cpufeature.h>
10#include <asm/special_insns.h> 10#include <asm/special_insns.h>
11#include <asm/smp.h> 11#include <asm/smp.h>
12#include <asm/invpcid.h>
13#include <asm/pti.h>
14#include <asm/processor-flags.h>
12 15
13static inline void __invpcid(unsigned long pcid, unsigned long addr, 16/*
14 unsigned long type) 17 * The x86 feature is called PCID (Process Context IDentifier). It is similar
15{ 18 * to what is traditionally called ASID on the RISC processors.
16 struct { u64 d[2]; } desc = { { pcid, addr } }; 19 *
20 * We don't use the traditional ASID implementation, where each process/mm gets
21 * its own ASID and flush/restart when we run out of ASID space.
22 *
23 * Instead we have a small per-cpu array of ASIDs and cache the last few mm's
24 * that came by on this CPU, allowing cheaper switch_mm between processes on
25 * this CPU.
26 *
27 * We end up with different spaces for different things. To avoid confusion we
28 * use different names for each of them:
29 *
30 * ASID - [0, TLB_NR_DYN_ASIDS-1]
31 * the canonical identifier for an mm
32 *
33 * kPCID - [1, TLB_NR_DYN_ASIDS]
34 * the value we write into the PCID part of CR3; corresponds to the
35 * ASID+1, because PCID 0 is special.
36 *
37 * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
38 * for KPTI each mm has two address spaces and thus needs two
39 * PCID values, but we can still do with a single ASID denomination
40 * for each mm. Corresponds to kPCID + 2048.
41 *
42 */
17 43
18 /* 44/* There are 12 bits of space for ASIDS in CR3 */
19 * The memory clobber is because the whole point is to invalidate 45#define CR3_HW_ASID_BITS 12
20 * stale TLB entries and, especially if we're flushing global
21 * mappings, we don't want the compiler to reorder any subsequent
22 * memory accesses before the TLB flush.
23 *
24 * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
25 * invpcid (%rcx), %rax in long mode.
26 */
27 asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
28 : : "m" (desc), "a" (type), "c" (&desc) : "memory");
29}
30 46
31#define INVPCID_TYPE_INDIV_ADDR 0 47/*
32#define INVPCID_TYPE_SINGLE_CTXT 1 48 * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
33#define INVPCID_TYPE_ALL_INCL_GLOBAL 2 49 * user/kernel switches
34#define INVPCID_TYPE_ALL_NON_GLOBAL 3 50 */
51#ifdef CONFIG_PAGE_TABLE_ISOLATION
52# define PTI_CONSUMED_PCID_BITS 1
53#else
54# define PTI_CONSUMED_PCID_BITS 0
55#endif
35 56
36/* Flush all mappings for a given pcid and addr, not including globals. */ 57#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
37static inline void invpcid_flush_one(unsigned long pcid, 58
38 unsigned long addr) 59/*
39{ 60 * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account
40 __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR); 61 * for them being zero-based. Another -1 is because PCID 0 is reserved for
41} 62 * use by non-PCID-aware users.
63 */
64#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
65
66/*
67 * 6 because 6 should be plenty and struct tlb_state will fit in two cache
68 * lines.
69 */
70#define TLB_NR_DYN_ASIDS 6
42 71
43/* Flush all mappings for a given PCID, not including globals. */ 72/*
44static inline void invpcid_flush_single_context(unsigned long pcid) 73 * Given @asid, compute kPCID
74 */
75static inline u16 kern_pcid(u16 asid)
45{ 76{
46 __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT); 77 VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
78
79#ifdef CONFIG_PAGE_TABLE_ISOLATION
80 /*
81 * Make sure that the dynamic ASID space does not confict with the
82 * bit we are using to switch between user and kernel ASIDs.
83 */
84 BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT));
85
86 /*
87 * The ASID being passed in here should have respected the
88 * MAX_ASID_AVAILABLE and thus never have the switch bit set.
89 */
90 VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT));
91#endif
92 /*
93 * The dynamically-assigned ASIDs that get passed in are small
94 * (<TLB_NR_DYN_ASIDS). They never have the high switch bit set,
95 * so do not bother to clear it.
96 *
97 * If PCID is on, ASID-aware code paths put the ASID+1 into the
98 * PCID bits. This serves two purposes. It prevents a nasty
99 * situation in which PCID-unaware code saves CR3, loads some other
100 * value (with PCID == 0), and then restores CR3, thus corrupting
101 * the TLB for ASID 0 if the saved ASID was nonzero. It also means
102 * that any bugs involving loading a PCID-enabled CR3 with
103 * CR4.PCIDE off will trigger deterministically.
104 */
105 return asid + 1;
47} 106}
48 107
49/* Flush all mappings, including globals, for all PCIDs. */ 108/*
50static inline void invpcid_flush_all(void) 109 * Given @asid, compute uPCID
110 */
111static inline u16 user_pcid(u16 asid)
51{ 112{
52 __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL); 113 u16 ret = kern_pcid(asid);
114#ifdef CONFIG_PAGE_TABLE_ISOLATION
115 ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
116#endif
117 return ret;
53} 118}
54 119
55/* Flush all mappings for all PCIDs except globals. */ 120struct pgd_t;
56static inline void invpcid_flush_all_nonglobals(void) 121static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
57{ 122{
58 __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL); 123 if (static_cpu_has(X86_FEATURE_PCID)) {
124 return __sme_pa(pgd) | kern_pcid(asid);
125 } else {
126 VM_WARN_ON_ONCE(asid != 0);
127 return __sme_pa(pgd);
128 }
59} 129}
60 130
61static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) 131static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
62{ 132{
63 u64 new_tlb_gen; 133 VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
64 134 VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID));
65 /* 135 return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
66 * Bump the generation count. This also serves as a full barrier
67 * that synchronizes with switch_mm(): callers are required to order
68 * their read of mm_cpumask after their writes to the paging
69 * structures.
70 */
71 smp_mb__before_atomic();
72 new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
73 smp_mb__after_atomic();
74
75 return new_tlb_gen;
76} 136}
77 137
78#ifdef CONFIG_PARAVIRT 138#ifdef CONFIG_PARAVIRT
@@ -99,12 +159,6 @@ static inline bool tlb_defer_switch_to_init_mm(void)
99 return !static_cpu_has(X86_FEATURE_PCID); 159 return !static_cpu_has(X86_FEATURE_PCID);
100} 160}
101 161
102/*
103 * 6 because 6 should be plenty and struct tlb_state will fit in
104 * two cache lines.
105 */
106#define TLB_NR_DYN_ASIDS 6
107
108struct tlb_context { 162struct tlb_context {
109 u64 ctx_id; 163 u64 ctx_id;
110 u64 tlb_gen; 164 u64 tlb_gen;
@@ -139,6 +193,24 @@ struct tlb_state {
139 bool is_lazy; 193 bool is_lazy;
140 194
141 /* 195 /*
196 * If set we changed the page tables in such a way that we
197 * needed an invalidation of all contexts (aka. PCIDs / ASIDs).
198 * This tells us to go invalidate all the non-loaded ctxs[]
199 * on the next context switch.
200 *
201 * The current ctx was kept up-to-date as it ran and does not
202 * need to be invalidated.
203 */
204 bool invalidate_other;
205
206 /*
207 * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
208 * the corresponding user PCID needs a flush next time we
209 * switch to it; see SWITCH_TO_USER_CR3.
210 */
211 unsigned short user_pcid_flush_mask;
212
213 /*
142 * Access to this CR4 shadow and to H/W CR4 is protected by 214 * Access to this CR4 shadow and to H/W CR4 is protected by
143 * disabling interrupts when modifying either one. 215 * disabling interrupts when modifying either one.
144 */ 216 */
@@ -219,6 +291,14 @@ static inline unsigned long cr4_read_shadow(void)
219} 291}
220 292
221/* 293/*
294 * Mark all other ASIDs as invalid, preserves the current.
295 */
296static inline void invalidate_other_asid(void)
297{
298 this_cpu_write(cpu_tlbstate.invalidate_other, true);
299}
300
301/*
222 * Save some of cr4 feature set we're using (e.g. Pentium 4MB 302 * Save some of cr4 feature set we're using (e.g. Pentium 4MB
223 * enable and PPro Global page enable), so that any CPU's that boot 303 * enable and PPro Global page enable), so that any CPU's that boot
224 * up after us can get the correct flags. This should only be used 304 * up after us can get the correct flags. This should only be used
@@ -237,37 +317,63 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
237 317
238extern void initialize_tlbstate_and_flush(void); 318extern void initialize_tlbstate_and_flush(void);
239 319
240static inline void __native_flush_tlb(void) 320/*
321 * Given an ASID, flush the corresponding user ASID. We can delay this
322 * until the next time we switch to it.
323 *
324 * See SWITCH_TO_USER_CR3.
325 */
326static inline void invalidate_user_asid(u16 asid)
241{ 327{
328 /* There is no user ASID if address space separation is off */
329 if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
330 return;
331
242 /* 332 /*
243 * If current->mm == NULL then we borrow a mm which may change during a 333 * We only have a single ASID if PCID is off and the CR3
244 * task switch and therefore we must not be preempted while we write CR3 334 * write will have flushed it.
245 * back:
246 */ 335 */
247 preempt_disable(); 336 if (!cpu_feature_enabled(X86_FEATURE_PCID))
248 native_write_cr3(__native_read_cr3()); 337 return;
249 preempt_enable(); 338
339 if (!static_cpu_has(X86_FEATURE_PTI))
340 return;
341
342 __set_bit(kern_pcid(asid),
343 (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
250} 344}
251 345
252static inline void __native_flush_tlb_global_irq_disabled(void) 346/*
347 * flush the entire current user mapping
348 */
349static inline void __native_flush_tlb(void)
253{ 350{
254 unsigned long cr4; 351 /*
352 * Preemption or interrupts must be disabled to protect the access
353 * to the per CPU variable and to prevent being preempted between
354 * read_cr3() and write_cr3().
355 */
356 WARN_ON_ONCE(preemptible());
255 357
256 cr4 = this_cpu_read(cpu_tlbstate.cr4); 358 invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
257 /* clear PGE */ 359
258 native_write_cr4(cr4 & ~X86_CR4_PGE); 360 /* If current->mm == NULL then the read_cr3() "borrows" an mm */
259 /* write old PGE again and flush TLBs */ 361 native_write_cr3(__native_read_cr3());
260 native_write_cr4(cr4);
261} 362}
262 363
364/*
365 * flush everything
366 */
263static inline void __native_flush_tlb_global(void) 367static inline void __native_flush_tlb_global(void)
264{ 368{
265 unsigned long flags; 369 unsigned long cr4, flags;
266 370
267 if (static_cpu_has(X86_FEATURE_INVPCID)) { 371 if (static_cpu_has(X86_FEATURE_INVPCID)) {
268 /* 372 /*
269 * Using INVPCID is considerably faster than a pair of writes 373 * Using INVPCID is considerably faster than a pair of writes
270 * to CR4 sandwiched inside an IRQ flag save/restore. 374 * to CR4 sandwiched inside an IRQ flag save/restore.
375 *
376 * Note, this works with CR4.PCIDE=0 or 1.
271 */ 377 */
272 invpcid_flush_all(); 378 invpcid_flush_all();
273 return; 379 return;
@@ -280,36 +386,69 @@ static inline void __native_flush_tlb_global(void)
280 */ 386 */
281 raw_local_irq_save(flags); 387 raw_local_irq_save(flags);
282 388
283 __native_flush_tlb_global_irq_disabled(); 389 cr4 = this_cpu_read(cpu_tlbstate.cr4);
390 /* toggle PGE */
391 native_write_cr4(cr4 ^ X86_CR4_PGE);
392 /* write old PGE again and flush TLBs */
393 native_write_cr4(cr4);
284 394
285 raw_local_irq_restore(flags); 395 raw_local_irq_restore(flags);
286} 396}
287 397
398/*
399 * flush one page in the user mapping
400 */
288static inline void __native_flush_tlb_single(unsigned long addr) 401static inline void __native_flush_tlb_single(unsigned long addr)
289{ 402{
403 u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
404
290 asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); 405 asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
406
407 if (!static_cpu_has(X86_FEATURE_PTI))
408 return;
409
410 /*
411 * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
412 * Just use invalidate_user_asid() in case we are called early.
413 */
414 if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
415 invalidate_user_asid(loaded_mm_asid);
416 else
417 invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
291} 418}
292 419
420/*
421 * flush everything
422 */
293static inline void __flush_tlb_all(void) 423static inline void __flush_tlb_all(void)
294{ 424{
295 if (boot_cpu_has(X86_FEATURE_PGE)) 425 if (boot_cpu_has(X86_FEATURE_PGE)) {
296 __flush_tlb_global(); 426 __flush_tlb_global();
297 else 427 } else {
428 /*
429 * !PGE -> !PCID (setup_pcid()), thus every flush is total.
430 */
298 __flush_tlb(); 431 __flush_tlb();
299 432 }
300 /*
301 * Note: if we somehow had PCID but not PGE, then this wouldn't work --
302 * we'd end up flushing kernel translations for the current ASID but
303 * we might fail to flush kernel translations for other cached ASIDs.
304 *
305 * To avoid this issue, we force PCID off if PGE is off.
306 */
307} 433}
308 434
435/*
436 * flush one page in the kernel mapping
437 */
309static inline void __flush_tlb_one(unsigned long addr) 438static inline void __flush_tlb_one(unsigned long addr)
310{ 439{
311 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); 440 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
312 __flush_tlb_single(addr); 441 __flush_tlb_single(addr);
442
443 if (!static_cpu_has(X86_FEATURE_PTI))
444 return;
445
446 /*
447 * __flush_tlb_single() will have cleared the TLB entry for this ASID,
448 * but since kernel space is replicated across all, we must also
449 * invalidate all others.
450 */
451 invalidate_other_asid();
313} 452}
314 453
315#define TLB_FLUSH_ALL -1UL 454#define TLB_FLUSH_ALL -1UL
@@ -370,6 +509,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
370void native_flush_tlb_others(const struct cpumask *cpumask, 509void native_flush_tlb_others(const struct cpumask *cpumask,
371 const struct flush_tlb_info *info); 510 const struct flush_tlb_info *info);
372 511
512static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
513{
514 /*
515 * Bump the generation count. This also serves as a full barrier
516 * that synchronizes with switch_mm(): callers are required to order
517 * their read of mm_cpumask after their writes to the paging
518 * structures.
519 */
520 return atomic64_inc_return(&mm->context.tlb_gen);
521}
522
373static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, 523static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
374 struct mm_struct *mm) 524 struct mm_struct *mm)
375{ 525{
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 84b9ec0c1bc0..22647a642e98 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -283,34 +283,34 @@ TRACE_EVENT(vector_alloc_managed,
283DECLARE_EVENT_CLASS(vector_activate, 283DECLARE_EVENT_CLASS(vector_activate,
284 284
285 TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve, 285 TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve,
286 bool early), 286 bool reserve),
287 287
288 TP_ARGS(irq, is_managed, can_reserve, early), 288 TP_ARGS(irq, is_managed, can_reserve, reserve),
289 289
290 TP_STRUCT__entry( 290 TP_STRUCT__entry(
291 __field( unsigned int, irq ) 291 __field( unsigned int, irq )
292 __field( bool, is_managed ) 292 __field( bool, is_managed )
293 __field( bool, can_reserve ) 293 __field( bool, can_reserve )
294 __field( bool, early ) 294 __field( bool, reserve )
295 ), 295 ),
296 296
297 TP_fast_assign( 297 TP_fast_assign(
298 __entry->irq = irq; 298 __entry->irq = irq;
299 __entry->is_managed = is_managed; 299 __entry->is_managed = is_managed;
300 __entry->can_reserve = can_reserve; 300 __entry->can_reserve = can_reserve;
301 __entry->early = early; 301 __entry->reserve = reserve;
302 ), 302 ),
303 303
304 TP_printk("irq=%u is_managed=%d can_reserve=%d early=%d", 304 TP_printk("irq=%u is_managed=%d can_reserve=%d reserve=%d",
305 __entry->irq, __entry->is_managed, __entry->can_reserve, 305 __entry->irq, __entry->is_managed, __entry->can_reserve,
306 __entry->early) 306 __entry->reserve)
307); 307);
308 308
309#define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name) \ 309#define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name) \
310DEFINE_EVENT_FN(vector_activate, name, \ 310DEFINE_EVENT_FN(vector_activate, name, \
311 TP_PROTO(unsigned int irq, bool is_managed, \ 311 TP_PROTO(unsigned int irq, bool is_managed, \
312 bool can_reserve, bool early), \ 312 bool can_reserve, bool reserve), \
313 TP_ARGS(irq, is_managed, can_reserve, early), NULL, NULL); \ 313 TP_ARGS(irq, is_managed, can_reserve, reserve), NULL, NULL); \
314 314
315DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate); 315DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate);
316DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate); 316DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate);
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 1fadd310ff68..31051f35cbb7 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
75dotraplinkage void do_stack_segment(struct pt_regs *, long); 75dotraplinkage void do_stack_segment(struct pt_regs *, long);
76#ifdef CONFIG_X86_64 76#ifdef CONFIG_X86_64
77dotraplinkage void do_double_fault(struct pt_regs *, long); 77dotraplinkage void do_double_fault(struct pt_regs *, long);
78asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
79#endif 78#endif
80dotraplinkage void do_general_protection(struct pt_regs *, long); 79dotraplinkage void do_general_protection(struct pt_regs *, long);
81dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); 80dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index e9cc6fe1fc6f..c1688c2d0a12 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -7,6 +7,9 @@
7#include <asm/ptrace.h> 7#include <asm/ptrace.h>
8#include <asm/stacktrace.h> 8#include <asm/stacktrace.h>
9 9
10#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
11#define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
12
10struct unwind_state { 13struct unwind_state {
11 struct stack_info stack_info; 14 struct stack_info stack_info;
12 unsigned long stack_mask; 15 unsigned long stack_mask;
@@ -52,6 +55,10 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
52} 55}
53 56
54#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) 57#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
58/*
59 * WARNING: The entire pt_regs may not be safe to dereference. In some cases,
60 * only the iret frame registers are accessible. Use with caution!
61 */
55static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) 62static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
56{ 63{
57 if (unwind_done(state)) 64 if (unwind_done(state))
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index d9a7c659009c..b986b2ca688a 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -7,6 +7,7 @@
7 7
8#ifdef CONFIG_X86_VSYSCALL_EMULATION 8#ifdef CONFIG_X86_VSYSCALL_EMULATION
9extern void map_vsyscall(void); 9extern void map_vsyscall(void);
10extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
10 11
11/* 12/*
12 * Called on instruction fetch fault in vsyscall page. 13 * Called on instruction fetch fault in vsyscall page.
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index 7e1e730396ae..bcba3c643e63 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -78,7 +78,12 @@
78#define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT) 78#define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT)
79#define X86_CR3_PCD_BIT 4 /* Page Cache Disable */ 79#define X86_CR3_PCD_BIT 4 /* Page Cache Disable */
80#define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT) 80#define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT)
81#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */ 81
82#define X86_CR3_PCID_BITS 12
83#define X86_CR3_PCID_MASK (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL))
84
85#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
86#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
82 87
83/* 88/*
84 * Intel CPU features in CR4 89 * Intel CPU features in CR4
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 6e272f3ea984..880441f24146 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2626,11 +2626,13 @@ static int __init apic_set_verbosity(char *arg)
2626 apic_verbosity = APIC_DEBUG; 2626 apic_verbosity = APIC_DEBUG;
2627 else if (strcmp("verbose", arg) == 0) 2627 else if (strcmp("verbose", arg) == 0)
2628 apic_verbosity = APIC_VERBOSE; 2628 apic_verbosity = APIC_VERBOSE;
2629#ifdef CONFIG_X86_64
2629 else { 2630 else {
2630 pr_warning("APIC Verbosity level %s not recognised" 2631 pr_warning("APIC Verbosity level %s not recognised"
2631 " use apic=verbose or apic=debug\n", arg); 2632 " use apic=verbose or apic=debug\n", arg);
2632 return -EINVAL; 2633 return -EINVAL;
2633 } 2634 }
2635#endif
2634 2636
2635 return 0; 2637 return 0;
2636} 2638}
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index aa85690e9b64..25a87028cb3f 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -151,7 +151,7 @@ static struct apic apic_flat __ro_after_init = {
151 .apic_id_valid = default_apic_id_valid, 151 .apic_id_valid = default_apic_id_valid,
152 .apic_id_registered = flat_apic_id_registered, 152 .apic_id_registered = flat_apic_id_registered,
153 153
154 .irq_delivery_mode = dest_LowestPrio, 154 .irq_delivery_mode = dest_Fixed,
155 .irq_dest_mode = 1, /* logical */ 155 .irq_dest_mode = 1, /* logical */
156 156
157 .disable_esr = 0, 157 .disable_esr = 0,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 7b659c4480c9..5078b5ce63a7 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -110,7 +110,7 @@ struct apic apic_noop __ro_after_init = {
110 .apic_id_valid = default_apic_id_valid, 110 .apic_id_valid = default_apic_id_valid,
111 .apic_id_registered = noop_apic_id_registered, 111 .apic_id_registered = noop_apic_id_registered,
112 112
113 .irq_delivery_mode = dest_LowestPrio, 113 .irq_delivery_mode = dest_Fixed,
114 /* logical delivery broadcast to all CPUs: */ 114 /* logical delivery broadcast to all CPUs: */
115 .irq_dest_mode = 1, 115 .irq_dest_mode = 1,
116 116
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 201579dc5242..8a7963421460 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2988,7 +2988,7 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
2988} 2988}
2989 2989
2990int mp_irqdomain_activate(struct irq_domain *domain, 2990int mp_irqdomain_activate(struct irq_domain *domain,
2991 struct irq_data *irq_data, bool early) 2991 struct irq_data *irq_data, bool reserve)
2992{ 2992{
2993 unsigned long flags; 2993 unsigned long flags;
2994 2994
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 9b18be764422..ce503c99f5c4 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -39,17 +39,13 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
39 ((apic->irq_dest_mode == 0) ? 39 ((apic->irq_dest_mode == 0) ?
40 MSI_ADDR_DEST_MODE_PHYSICAL : 40 MSI_ADDR_DEST_MODE_PHYSICAL :
41 MSI_ADDR_DEST_MODE_LOGICAL) | 41 MSI_ADDR_DEST_MODE_LOGICAL) |
42 ((apic->irq_delivery_mode != dest_LowestPrio) ? 42 MSI_ADDR_REDIRECTION_CPU |
43 MSI_ADDR_REDIRECTION_CPU :
44 MSI_ADDR_REDIRECTION_LOWPRI) |
45 MSI_ADDR_DEST_ID(cfg->dest_apicid); 43 MSI_ADDR_DEST_ID(cfg->dest_apicid);
46 44
47 msg->data = 45 msg->data =
48 MSI_DATA_TRIGGER_EDGE | 46 MSI_DATA_TRIGGER_EDGE |
49 MSI_DATA_LEVEL_ASSERT | 47 MSI_DATA_LEVEL_ASSERT |
50 ((apic->irq_delivery_mode != dest_LowestPrio) ? 48 MSI_DATA_DELIVERY_FIXED |
51 MSI_DATA_DELIVERY_FIXED :
52 MSI_DATA_DELIVERY_LOWPRI) |
53 MSI_DATA_VECTOR(cfg->vector); 49 MSI_DATA_VECTOR(cfg->vector);
54} 50}
55 51
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index fa22017de806..02e8acb134f8 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -105,7 +105,7 @@ static struct apic apic_default __ro_after_init = {
105 .apic_id_valid = default_apic_id_valid, 105 .apic_id_valid = default_apic_id_valid,
106 .apic_id_registered = default_apic_id_registered, 106 .apic_id_registered = default_apic_id_registered,
107 107
108 .irq_delivery_mode = dest_LowestPrio, 108 .irq_delivery_mode = dest_Fixed,
109 /* logical delivery broadcast to all CPUs: */ 109 /* logical delivery broadcast to all CPUs: */
110 .irq_dest_mode = 1, 110 .irq_dest_mode = 1,
111 111
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 750449152b04..f8b03bb8e725 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -184,6 +184,7 @@ static void reserve_irq_vector_locked(struct irq_data *irqd)
184 irq_matrix_reserve(vector_matrix); 184 irq_matrix_reserve(vector_matrix);
185 apicd->can_reserve = true; 185 apicd->can_reserve = true;
186 apicd->has_reserved = true; 186 apicd->has_reserved = true;
187 irqd_set_can_reserve(irqd);
187 trace_vector_reserve(irqd->irq, 0); 188 trace_vector_reserve(irqd->irq, 0);
188 vector_assign_managed_shutdown(irqd); 189 vector_assign_managed_shutdown(irqd);
189} 190}
@@ -368,8 +369,18 @@ static int activate_reserved(struct irq_data *irqd)
368 int ret; 369 int ret;
369 370
370 ret = assign_irq_vector_any_locked(irqd); 371 ret = assign_irq_vector_any_locked(irqd);
371 if (!ret) 372 if (!ret) {
372 apicd->has_reserved = false; 373 apicd->has_reserved = false;
374 /*
375 * Core might have disabled reservation mode after
376 * allocating the irq descriptor. Ideally this should
377 * happen before allocation time, but that would require
378 * completely convoluted ways of transporting that
379 * information.
380 */
381 if (!irqd_can_reserve(irqd))
382 apicd->can_reserve = false;
383 }
373 return ret; 384 return ret;
374} 385}
375 386
@@ -398,21 +409,21 @@ static int activate_managed(struct irq_data *irqd)
398} 409}
399 410
400static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd, 411static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
401 bool early) 412 bool reserve)
402{ 413{
403 struct apic_chip_data *apicd = apic_chip_data(irqd); 414 struct apic_chip_data *apicd = apic_chip_data(irqd);
404 unsigned long flags; 415 unsigned long flags;
405 int ret = 0; 416 int ret = 0;
406 417
407 trace_vector_activate(irqd->irq, apicd->is_managed, 418 trace_vector_activate(irqd->irq, apicd->is_managed,
408 apicd->can_reserve, early); 419 apicd->can_reserve, reserve);
409 420
410 /* Nothing to do for fixed assigned vectors */ 421 /* Nothing to do for fixed assigned vectors */
411 if (!apicd->can_reserve && !apicd->is_managed) 422 if (!apicd->can_reserve && !apicd->is_managed)
412 return 0; 423 return 0;
413 424
414 raw_spin_lock_irqsave(&vector_lock, flags); 425 raw_spin_lock_irqsave(&vector_lock, flags);
415 if (early || irqd_is_managed_and_shutdown(irqd)) 426 if (reserve || irqd_is_managed_and_shutdown(irqd))
416 vector_assign_managed_shutdown(irqd); 427 vector_assign_managed_shutdown(irqd);
417 else if (apicd->is_managed) 428 else if (apicd->is_managed)
418 ret = activate_managed(irqd); 429 ret = activate_managed(irqd);
@@ -478,6 +489,7 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd,
478 } else { 489 } else {
479 /* Release the vector */ 490 /* Release the vector */
480 apicd->can_reserve = true; 491 apicd->can_reserve = true;
492 irqd_set_can_reserve(irqd);
481 clear_irq_vector(irqd); 493 clear_irq_vector(irqd);
482 realloc = true; 494 realloc = true;
483 } 495 }
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 622f13ca8a94..8b04234e010b 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -184,7 +184,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
184 .apic_id_valid = x2apic_apic_id_valid, 184 .apic_id_valid = x2apic_apic_id_valid,
185 .apic_id_registered = x2apic_apic_id_registered, 185 .apic_id_registered = x2apic_apic_id_registered,
186 186
187 .irq_delivery_mode = dest_LowestPrio, 187 .irq_delivery_mode = dest_Fixed,
188 .irq_dest_mode = 1, /* logical */ 188 .irq_dest_mode = 1, /* logical */
189 189
190 .disable_esr = 0, 190 .disable_esr = 0,
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 8ea78275480d..76417a9aab73 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -17,6 +17,7 @@
17#include <asm/sigframe.h> 17#include <asm/sigframe.h>
18#include <asm/bootparam.h> 18#include <asm/bootparam.h>
19#include <asm/suspend.h> 19#include <asm/suspend.h>
20#include <asm/tlbflush.h>
20 21
21#ifdef CONFIG_XEN 22#ifdef CONFIG_XEN
22#include <xen/interface/xen.h> 23#include <xen/interface/xen.h>
@@ -93,4 +94,13 @@ void common(void) {
93 94
94 BLANK(); 95 BLANK();
95 DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); 96 DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
97
98 /* TLB state for the entry code */
99 OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask);
100
101 /* Layout info for cpu_entry_area */
102 OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
103 OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
104 OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page);
105 DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack));
96} 106}
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index dedf428b20b6..fa1261eefa16 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -47,13 +47,8 @@ void foo(void)
47 BLANK(); 47 BLANK();
48 48
49 /* Offset from the sysenter stack to tss.sp0 */ 49 /* Offset from the sysenter stack to tss.sp0 */
50 DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - 50 DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
51 offsetofend(struct tss_struct, SYSENTER_stack)); 51 offsetofend(struct cpu_entry_area, entry_stack_page.stack));
52
53 /* Offset from cpu_tss to SYSENTER_stack */
54 OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
55 /* Size of SYSENTER_stack */
56 DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
57 52
58#ifdef CONFIG_CC_STACKPROTECTOR 53#ifdef CONFIG_CC_STACKPROTECTOR
59 BLANK(); 54 BLANK();
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 630212fa9b9d..bf51e51d808d 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -23,6 +23,9 @@ int main(void)
23#ifdef CONFIG_PARAVIRT 23#ifdef CONFIG_PARAVIRT
24 OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); 24 OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
25 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); 25 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
26#ifdef CONFIG_DEBUG_ENTRY
27 OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
28#endif
26 BLANK(); 29 BLANK();
27#endif 30#endif
28 31
@@ -63,6 +66,7 @@ int main(void)
63 66
64 OFFSET(TSS_ist, tss_struct, x86_tss.ist); 67 OFFSET(TSS_ist, tss_struct, x86_tss.ist);
65 OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); 68 OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
69 OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
66 BLANK(); 70 BLANK();
67 71
68#ifdef CONFIG_CC_STACKPROTECTOR 72#ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index fa998ca8aa5a..c47de4ebf63a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -476,8 +476,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
476 return NULL; /* Not found */ 476 return NULL; /* Not found */
477} 477}
478 478
479__u32 cpu_caps_cleared[NCAPINTS]; 479__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
480__u32 cpu_caps_set[NCAPINTS]; 480__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
481 481
482void load_percpu_segment(int cpu) 482void load_percpu_segment(int cpu)
483{ 483{
@@ -490,28 +490,23 @@ void load_percpu_segment(int cpu)
490 load_stack_canary_segment(); 490 load_stack_canary_segment();
491} 491}
492 492
493/* Setup the fixmap mapping only once per-processor */ 493#ifdef CONFIG_X86_32
494static inline void setup_fixmap_gdt(int cpu) 494/* The 32-bit entry code needs to find cpu_entry_area. */
495{ 495DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
496#ifdef CONFIG_X86_64
497 /* On 64-bit systems, we use a read-only fixmap GDT. */
498 pgprot_t prot = PAGE_KERNEL_RO;
499#else
500 /*
501 * On native 32-bit systems, the GDT cannot be read-only because
502 * our double fault handler uses a task gate, and entering through
503 * a task gate needs to change an available TSS to busy. If the GDT
504 * is read-only, that will triple fault.
505 *
506 * On Xen PV, the GDT must be read-only because the hypervisor requires
507 * it.
508 */
509 pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
510 PAGE_KERNEL_RO : PAGE_KERNEL;
511#endif 496#endif
512 497
513 __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot); 498#ifdef CONFIG_X86_64
514} 499/*
500 * Special IST stacks which the CPU switches to when it calls
501 * an IST-marked descriptor entry. Up to 7 stacks (hardware
502 * limit), all of them are 4K, except the debug stack which
503 * is 8K.
504 */
505static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
506 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
507 [DEBUG_STACK - 1] = DEBUG_STKSZ
508};
509#endif
515 510
516/* Load the original GDT from the per-cpu structure */ 511/* Load the original GDT from the per-cpu structure */
517void load_direct_gdt(int cpu) 512void load_direct_gdt(int cpu)
@@ -747,7 +742,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
747{ 742{
748 int i; 743 int i;
749 744
750 for (i = 0; i < NCAPINTS; i++) { 745 for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
751 c->x86_capability[i] &= ~cpu_caps_cleared[i]; 746 c->x86_capability[i] &= ~cpu_caps_cleared[i];
752 c->x86_capability[i] |= cpu_caps_set[i]; 747 c->x86_capability[i] |= cpu_caps_set[i];
753 } 748 }
@@ -927,6 +922,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
927 } 922 }
928 923
929 setup_force_cpu_cap(X86_FEATURE_ALWAYS); 924 setup_force_cpu_cap(X86_FEATURE_ALWAYS);
925
926 /* Assume for now that ALL x86 CPUs are insecure */
927 setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
928
930 fpu__init_system(c); 929 fpu__init_system(c);
931 930
932#ifdef CONFIG_X86_32 931#ifdef CONFIG_X86_32
@@ -1250,7 +1249,7 @@ void enable_sep_cpu(void)
1250 return; 1249 return;
1251 1250
1252 cpu = get_cpu(); 1251 cpu = get_cpu();
1253 tss = &per_cpu(cpu_tss, cpu); 1252 tss = &per_cpu(cpu_tss_rw, cpu);
1254 1253
1255 /* 1254 /*
1256 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- 1255 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
@@ -1259,11 +1258,7 @@ void enable_sep_cpu(void)
1259 1258
1260 tss->x86_tss.ss1 = __KERNEL_CS; 1259 tss->x86_tss.ss1 = __KERNEL_CS;
1261 wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); 1260 wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
1262 1261 wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
1263 wrmsr(MSR_IA32_SYSENTER_ESP,
1264 (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
1265 0);
1266
1267 wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); 1262 wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
1268 1263
1269 put_cpu(); 1264 put_cpu();
@@ -1357,25 +1352,22 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
1357DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; 1352DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
1358EXPORT_PER_CPU_SYMBOL(__preempt_count); 1353EXPORT_PER_CPU_SYMBOL(__preempt_count);
1359 1354
1360/*
1361 * Special IST stacks which the CPU switches to when it calls
1362 * an IST-marked descriptor entry. Up to 7 stacks (hardware
1363 * limit), all of them are 4K, except the debug stack which
1364 * is 8K.
1365 */
1366static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
1367 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
1368 [DEBUG_STACK - 1] = DEBUG_STKSZ
1369};
1370
1371static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
1372 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
1373
1374/* May not be marked __init: used by software suspend */ 1355/* May not be marked __init: used by software suspend */
1375void syscall_init(void) 1356void syscall_init(void)
1376{ 1357{
1358 extern char _entry_trampoline[];
1359 extern char entry_SYSCALL_64_trampoline[];
1360
1361 int cpu = smp_processor_id();
1362 unsigned long SYSCALL64_entry_trampoline =
1363 (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
1364 (entry_SYSCALL_64_trampoline - _entry_trampoline);
1365
1377 wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); 1366 wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
1378 wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); 1367 if (static_cpu_has(X86_FEATURE_PTI))
1368 wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
1369 else
1370 wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
1379 1371
1380#ifdef CONFIG_IA32_EMULATION 1372#ifdef CONFIG_IA32_EMULATION
1381 wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); 1373 wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
@@ -1386,7 +1378,7 @@ void syscall_init(void)
1386 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). 1378 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
1387 */ 1379 */
1388 wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); 1380 wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
1389 wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); 1381 wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
1390 wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); 1382 wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
1391#else 1383#else
1392 wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); 1384 wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
@@ -1530,7 +1522,7 @@ void cpu_init(void)
1530 if (cpu) 1522 if (cpu)
1531 load_ucode_ap(); 1523 load_ucode_ap();
1532 1524
1533 t = &per_cpu(cpu_tss, cpu); 1525 t = &per_cpu(cpu_tss_rw, cpu);
1534 oist = &per_cpu(orig_ist, cpu); 1526 oist = &per_cpu(orig_ist, cpu);
1535 1527
1536#ifdef CONFIG_NUMA 1528#ifdef CONFIG_NUMA
@@ -1569,7 +1561,7 @@ void cpu_init(void)
1569 * set up and load the per-CPU TSS 1561 * set up and load the per-CPU TSS
1570 */ 1562 */
1571 if (!oist->ist[0]) { 1563 if (!oist->ist[0]) {
1572 char *estacks = per_cpu(exception_stacks, cpu); 1564 char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
1573 1565
1574 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 1566 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1575 estacks += exception_stack_sizes[v]; 1567 estacks += exception_stack_sizes[v];
@@ -1580,7 +1572,7 @@ void cpu_init(void)
1580 } 1572 }
1581 } 1573 }
1582 1574
1583 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); 1575 t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
1584 1576
1585 /* 1577 /*
1586 * <= is required because the CPU will access up to 1578 * <= is required because the CPU will access up to
@@ -1596,11 +1588,12 @@ void cpu_init(void)
1596 enter_lazy_tlb(&init_mm, me); 1588 enter_lazy_tlb(&init_mm, me);
1597 1589
1598 /* 1590 /*
1599 * Initialize the TSS. Don't bother initializing sp0, as the initial 1591 * Initialize the TSS. sp0 points to the entry trampoline stack
1600 * task never enters user mode. 1592 * regardless of what task is running.
1601 */ 1593 */
1602 set_tss_desc(cpu, t); 1594 set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
1603 load_TR_desc(); 1595 load_TR_desc();
1596 load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
1604 1597
1605 load_mm_ldt(&init_mm); 1598 load_mm_ldt(&init_mm);
1606 1599
@@ -1612,7 +1605,6 @@ void cpu_init(void)
1612 if (is_uv_system()) 1605 if (is_uv_system())
1613 uv_cpu_init(); 1606 uv_cpu_init();
1614 1607
1615 setup_fixmap_gdt(cpu);
1616 load_fixmap_gdt(cpu); 1608 load_fixmap_gdt(cpu);
1617} 1609}
1618 1610
@@ -1622,7 +1614,7 @@ void cpu_init(void)
1622{ 1614{
1623 int cpu = smp_processor_id(); 1615 int cpu = smp_processor_id();
1624 struct task_struct *curr = current; 1616 struct task_struct *curr = current;
1625 struct tss_struct *t = &per_cpu(cpu_tss, cpu); 1617 struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
1626 1618
1627 wait_for_master_cpu(cpu); 1619 wait_for_master_cpu(cpu);
1628 1620
@@ -1657,12 +1649,12 @@ void cpu_init(void)
1657 * Initialize the TSS. Don't bother initializing sp0, as the initial 1649 * Initialize the TSS. Don't bother initializing sp0, as the initial
1658 * task never enters user mode. 1650 * task never enters user mode.
1659 */ 1651 */
1660 set_tss_desc(cpu, t); 1652 set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
1661 load_TR_desc(); 1653 load_TR_desc();
1662 1654
1663 load_mm_ldt(&init_mm); 1655 load_mm_ldt(&init_mm);
1664 1656
1665 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); 1657 t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
1666 1658
1667#ifdef CONFIG_DOUBLEFAULT 1659#ifdef CONFIG_DOUBLEFAULT
1668 /* Set up doublefault TSS pointer in the GDT */ 1660 /* Set up doublefault TSS pointer in the GDT */
@@ -1674,7 +1666,6 @@ void cpu_init(void)
1674 1666
1675 fpu__init_cpu(); 1667 fpu__init_cpu();
1676 1668
1677 setup_fixmap_gdt(cpu);
1678 load_fixmap_gdt(cpu); 1669 load_fixmap_gdt(cpu);
1679} 1670}
1680#endif 1671#endif
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 7dbcb7adf797..8ccdca6d3f9e 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -565,15 +565,6 @@ static void print_ucode(struct ucode_cpu_info *uci)
565} 565}
566#else 566#else
567 567
568/*
569 * Flush global tlb. We only do this in x86_64 where paging has been enabled
570 * already and PGE should be enabled as well.
571 */
572static inline void flush_tlb_early(void)
573{
574 __native_flush_tlb_global_irq_disabled();
575}
576
577static inline void print_ucode(struct ucode_cpu_info *uci) 568static inline void print_ucode(struct ucode_cpu_info *uci)
578{ 569{
579 struct microcode_intel *mc; 570 struct microcode_intel *mc;
@@ -602,10 +593,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
602 if (rev != mc->hdr.rev) 593 if (rev != mc->hdr.rev)
603 return -1; 594 return -1;
604 595
605#ifdef CONFIG_X86_64
606 /* Flush global tlb. This is precaution. */
607 flush_tlb_early();
608#endif
609 uci->cpu_sig.rev = rev; 596 uci->cpu_sig.rev = rev;
610 597
611 if (early) 598 if (early)
diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
index 0e662c55ae90..0b8cedb20d6d 100644
--- a/arch/x86/kernel/doublefault.c
+++ b/arch/x86/kernel/doublefault.c
@@ -50,25 +50,23 @@ static void doublefault_fn(void)
50 cpu_relax(); 50 cpu_relax();
51} 51}
52 52
53struct tss_struct doublefault_tss __cacheline_aligned = { 53struct x86_hw_tss doublefault_tss __cacheline_aligned = {
54 .x86_tss = { 54 .sp0 = STACK_START,
55 .sp0 = STACK_START, 55 .ss0 = __KERNEL_DS,
56 .ss0 = __KERNEL_DS, 56 .ldt = 0,
57 .ldt = 0, 57 .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
58 .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, 58
59 59 .ip = (unsigned long) doublefault_fn,
60 .ip = (unsigned long) doublefault_fn, 60 /* 0x2 bit is always set */
61 /* 0x2 bit is always set */ 61 .flags = X86_EFLAGS_SF | 0x2,
62 .flags = X86_EFLAGS_SF | 0x2, 62 .sp = STACK_START,
63 .sp = STACK_START, 63 .es = __USER_DS,
64 .es = __USER_DS, 64 .cs = __KERNEL_CS,
65 .cs = __KERNEL_CS, 65 .ss = __KERNEL_DS,
66 .ss = __KERNEL_DS, 66 .ds = __USER_DS,
67 .ds = __USER_DS, 67 .fs = __KERNEL_PERCPU,
68 .fs = __KERNEL_PERCPU, 68
69 69 .__cr3 = __pa_nodebug(swapper_pg_dir),
70 .__cr3 = __pa_nodebug(swapper_pg_dir),
71 }
72}; 70};
73 71
74/* dummy for do_double_fault() call */ 72/* dummy for do_double_fault() call */
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index f13b4c00a5de..5fa110699ed2 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -18,6 +18,7 @@
18#include <linux/nmi.h> 18#include <linux/nmi.h>
19#include <linux/sysfs.h> 19#include <linux/sysfs.h>
20 20
21#include <asm/cpu_entry_area.h>
21#include <asm/stacktrace.h> 22#include <asm/stacktrace.h>
22#include <asm/unwind.h> 23#include <asm/unwind.h>
23 24
@@ -43,6 +44,24 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
43 return true; 44 return true;
44} 45}
45 46
47bool in_entry_stack(unsigned long *stack, struct stack_info *info)
48{
49 struct entry_stack *ss = cpu_entry_stack(smp_processor_id());
50
51 void *begin = ss;
52 void *end = ss + 1;
53
54 if ((void *)stack < begin || (void *)stack >= end)
55 return false;
56
57 info->type = STACK_TYPE_ENTRY;
58 info->begin = begin;
59 info->end = end;
60 info->next_sp = NULL;
61
62 return true;
63}
64
46static void printk_stack_address(unsigned long address, int reliable, 65static void printk_stack_address(unsigned long address, int reliable,
47 char *log_lvl) 66 char *log_lvl)
48{ 67{
@@ -50,6 +69,28 @@ static void printk_stack_address(unsigned long address, int reliable,
50 printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); 69 printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
51} 70}
52 71
72void show_iret_regs(struct pt_regs *regs)
73{
74 printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
75 printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
76 regs->sp, regs->flags);
77}
78
79static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
80{
81 if (on_stack(info, regs, sizeof(*regs)))
82 __show_regs(regs, 0);
83 else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
84 IRET_FRAME_SIZE)) {
85 /*
86 * When an interrupt or exception occurs in entry code, the
87 * full pt_regs might not have been saved yet. In that case
88 * just print the iret frame.
89 */
90 show_iret_regs(regs);
91 }
92}
93
53void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 94void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
54 unsigned long *stack, char *log_lvl) 95 unsigned long *stack, char *log_lvl)
55{ 96{
@@ -71,31 +112,35 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
71 * - task stack 112 * - task stack
72 * - interrupt stack 113 * - interrupt stack
73 * - HW exception stacks (double fault, nmi, debug, mce) 114 * - HW exception stacks (double fault, nmi, debug, mce)
115 * - entry stack
74 * 116 *
75 * x86-32 can have up to three stacks: 117 * x86-32 can have up to four stacks:
76 * - task stack 118 * - task stack
77 * - softirq stack 119 * - softirq stack
78 * - hardirq stack 120 * - hardirq stack
121 * - entry stack
79 */ 122 */
80 for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { 123 for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
81 const char *stack_name; 124 const char *stack_name;
82 125
83 /* 126 if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
84 * If we overflowed the task stack into a guard page, jump back 127 /*
85 * to the bottom of the usable stack. 128 * We weren't on a valid stack. It's possible that
86 */ 129 * we overflowed a valid stack into a guard page.
87 if (task_stack_page(task) - (void *)stack < PAGE_SIZE) 130 * See if the next page up is valid so that we can
88 stack = task_stack_page(task); 131 * generate some kind of backtrace if this happens.
89 132 */
90 if (get_stack_info(stack, task, &stack_info, &visit_mask)) 133 stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
91 break; 134 if (get_stack_info(stack, task, &stack_info, &visit_mask))
135 break;
136 }
92 137
93 stack_name = stack_type_name(stack_info.type); 138 stack_name = stack_type_name(stack_info.type);
94 if (stack_name) 139 if (stack_name)
95 printk("%s <%s>\n", log_lvl, stack_name); 140 printk("%s <%s>\n", log_lvl, stack_name);
96 141
97 if (regs && on_stack(&stack_info, regs, sizeof(*regs))) 142 if (regs)
98 __show_regs(regs, 0); 143 show_regs_safe(&stack_info, regs);
99 144
100 /* 145 /*
101 * Scan the stack, printing any text addresses we find. At the 146 * Scan the stack, printing any text addresses we find. At the
@@ -119,7 +164,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
119 164
120 /* 165 /*
121 * Don't print regs->ip again if it was already printed 166 * Don't print regs->ip again if it was already printed
122 * by __show_regs() below. 167 * by show_regs_safe() below.
123 */ 168 */
124 if (regs && stack == &regs->ip) 169 if (regs && stack == &regs->ip)
125 goto next; 170 goto next;
@@ -155,8 +200,8 @@ next:
155 200
156 /* if the frame has entry regs, print them */ 201 /* if the frame has entry regs, print them */
157 regs = unwind_get_entry_regs(&state); 202 regs = unwind_get_entry_regs(&state);
158 if (regs && on_stack(&stack_info, regs, sizeof(*regs))) 203 if (regs)
159 __show_regs(regs, 0); 204 show_regs_safe(&stack_info, regs);
160 } 205 }
161 206
162 if (stack_name) 207 if (stack_name)
@@ -252,11 +297,13 @@ int __die(const char *str, struct pt_regs *regs, long err)
252 unsigned long sp; 297 unsigned long sp;
253#endif 298#endif
254 printk(KERN_DEFAULT 299 printk(KERN_DEFAULT
255 "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter, 300 "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
256 IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", 301 IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
257 IS_ENABLED(CONFIG_SMP) ? " SMP" : "", 302 IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
258 debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", 303 debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
259 IS_ENABLED(CONFIG_KASAN) ? " KASAN" : ""); 304 IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "",
305 IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
306 (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
260 307
261 if (notify_die(DIE_OOPS, str, regs, err, 308 if (notify_die(DIE_OOPS, str, regs, err,
262 current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) 309 current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index daefae83a3aa..04170f63e3a1 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type)
26 if (type == STACK_TYPE_SOFTIRQ) 26 if (type == STACK_TYPE_SOFTIRQ)
27 return "SOFTIRQ"; 27 return "SOFTIRQ";
28 28
29 if (type == STACK_TYPE_ENTRY)
30 return "ENTRY_TRAMPOLINE";
31
29 return NULL; 32 return NULL;
30} 33}
31 34
@@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
93 if (task != current) 96 if (task != current)
94 goto unknown; 97 goto unknown;
95 98
99 if (in_entry_stack(stack, info))
100 goto recursion_check;
101
96 if (in_hardirq_stack(stack, info)) 102 if (in_hardirq_stack(stack, info))
97 goto recursion_check; 103 goto recursion_check;
98 104
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 88ce2ffdb110..563e28d14f2c 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -37,6 +37,15 @@ const char *stack_type_name(enum stack_type type)
37 if (type == STACK_TYPE_IRQ) 37 if (type == STACK_TYPE_IRQ)
38 return "IRQ"; 38 return "IRQ";
39 39
40 if (type == STACK_TYPE_ENTRY) {
41 /*
42 * On 64-bit, we have a generic entry stack that we
43 * use for all the kernel entry points, including
44 * SYSENTER.
45 */
46 return "ENTRY_TRAMPOLINE";
47 }
48
40 if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST) 49 if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
41 return exception_stack_names[type - STACK_TYPE_EXCEPTION]; 50 return exception_stack_names[type - STACK_TYPE_EXCEPTION];
42 51
@@ -115,6 +124,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
115 if (in_irq_stack(stack, info)) 124 if (in_irq_stack(stack, info))
116 goto recursion_check; 125 goto recursion_check;
117 126
127 if (in_entry_stack(stack, info))
128 goto recursion_check;
129
118 goto unknown; 130 goto unknown;
119 131
120recursion_check: 132recursion_check:
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 7dca675fe78d..04a625f0fcda 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -341,6 +341,27 @@ GLOBAL(early_recursion_flag)
341 .balign PAGE_SIZE; \ 341 .balign PAGE_SIZE; \
342GLOBAL(name) 342GLOBAL(name)
343 343
344#ifdef CONFIG_PAGE_TABLE_ISOLATION
345/*
346 * Each PGD needs to be 8k long and 8k aligned. We do not
347 * ever go out to userspace with these, so we do not
348 * strictly *need* the second page, but this allows us to
349 * have a single set_pgd() implementation that does not
350 * need to worry about whether it has 4k or 8k to work
351 * with.
352 *
353 * This ensures PGDs are 8k long:
354 */
355#define PTI_USER_PGD_FILL 512
356/* This ensures they are 8k-aligned: */
357#define NEXT_PGD_PAGE(name) \
358 .balign 2 * PAGE_SIZE; \
359GLOBAL(name)
360#else
361#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
362#define PTI_USER_PGD_FILL 0
363#endif
364
344/* Automate the creation of 1 to 1 mapping pmd entries */ 365/* Automate the creation of 1 to 1 mapping pmd entries */
345#define PMDS(START, PERM, COUNT) \ 366#define PMDS(START, PERM, COUNT) \
346 i = 0 ; \ 367 i = 0 ; \
@@ -350,13 +371,14 @@ GLOBAL(name)
350 .endr 371 .endr
351 372
352 __INITDATA 373 __INITDATA
353NEXT_PAGE(early_top_pgt) 374NEXT_PGD_PAGE(early_top_pgt)
354 .fill 511,8,0 375 .fill 511,8,0
355#ifdef CONFIG_X86_5LEVEL 376#ifdef CONFIG_X86_5LEVEL
356 .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC 377 .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
357#else 378#else
358 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC 379 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
359#endif 380#endif
381 .fill PTI_USER_PGD_FILL,8,0
360 382
361NEXT_PAGE(early_dynamic_pgts) 383NEXT_PAGE(early_dynamic_pgts)
362 .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0 384 .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
@@ -364,13 +386,14 @@ NEXT_PAGE(early_dynamic_pgts)
364 .data 386 .data
365 387
366#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) 388#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
367NEXT_PAGE(init_top_pgt) 389NEXT_PGD_PAGE(init_top_pgt)
368 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 390 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
369 .org init_top_pgt + PGD_PAGE_OFFSET*8, 0 391 .org init_top_pgt + PGD_PAGE_OFFSET*8, 0
370 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 392 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
371 .org init_top_pgt + PGD_START_KERNEL*8, 0 393 .org init_top_pgt + PGD_START_KERNEL*8, 0
372 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ 394 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
373 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC 395 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
396 .fill PTI_USER_PGD_FILL,8,0
374 397
375NEXT_PAGE(level3_ident_pgt) 398NEXT_PAGE(level3_ident_pgt)
376 .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 399 .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
@@ -381,8 +404,9 @@ NEXT_PAGE(level2_ident_pgt)
381 */ 404 */
382 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) 405 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
383#else 406#else
384NEXT_PAGE(init_top_pgt) 407NEXT_PGD_PAGE(init_top_pgt)
385 .fill 512,8,0 408 .fill 512,8,0
409 .fill PTI_USER_PGD_FILL,8,0
386#endif 410#endif
387 411
388#ifdef CONFIG_X86_5LEVEL 412#ifdef CONFIG_X86_5LEVEL
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 3feb648781c4..2f723301eb58 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
67 * because the ->io_bitmap_max value must match the bitmap 67 * because the ->io_bitmap_max value must match the bitmap
68 * contents: 68 * contents:
69 */ 69 */
70 tss = &per_cpu(cpu_tss, get_cpu()); 70 tss = &per_cpu(cpu_tss_rw, get_cpu());
71 71
72 if (turn_on) 72 if (turn_on)
73 bitmap_clear(t->io_bitmap_ptr, from, num); 73 bitmap_clear(t->io_bitmap_ptr, from, num);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 49cfd9fe7589..68e1867cca80 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
219 /* high bit used in ret_from_ code */ 219 /* high bit used in ret_from_ code */
220 unsigned vector = ~regs->orig_ax; 220 unsigned vector = ~regs->orig_ax;
221 221
222 /*
223 * NB: Unlike exception entries, IRQ entries do not reliably
224 * handle context tracking in the low-level entry code. This is
225 * because syscall entries execute briefly with IRQs on before
226 * updating context tracking state, so we can take an IRQ from
227 * kernel mode with CONTEXT_USER. The low-level entry code only
228 * updates the context if we came from user mode, so we won't
229 * switch to CONTEXT_KERNEL. We'll fix that once the syscall
230 * code is cleaned up enough that we can cleanly defer enabling
231 * IRQs.
232 */
233
234 entering_irq(); 222 entering_irq();
235 223
236 /* entering_irq() tells RCU that we're not quiescent. Check it. */ 224 /* entering_irq() tells RCU that we're not quiescent. Check it. */
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 020efbf5786b..d86e344f5b3d 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs)
57 if (regs->sp >= estack_top && regs->sp <= estack_bottom) 57 if (regs->sp >= estack_top && regs->sp <= estack_bottom)
58 return; 58 return;
59 59
60 WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n", 60 WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
61 current->comm, curbase, regs->sp, 61 current->comm, curbase, regs->sp,
62 irq_stack_top, irq_stack_bottom, 62 irq_stack_top, irq_stack_bottom,
63 estack_top, estack_bottom); 63 estack_top, estack_bottom, (void *)regs->ip);
64 64
65 if (sysctl_panic_on_stackoverflow) 65 if (sysctl_panic_on_stackoverflow)
66 panic("low stack detected by irq handler - check messages\n"); 66 panic("low stack detected by irq handler - check messages\n");
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 1c1eae961340..26d713ecad34 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -5,6 +5,11 @@
5 * Copyright (C) 2002 Andi Kleen 5 * Copyright (C) 2002 Andi Kleen
6 * 6 *
7 * This handles calls from both 32bit and 64bit mode. 7 * This handles calls from both 32bit and 64bit mode.
8 *
9 * Lock order:
10 * contex.ldt_usr_sem
11 * mmap_sem
12 * context.lock
8 */ 13 */
9 14
10#include <linux/errno.h> 15#include <linux/errno.h>
@@ -19,6 +24,7 @@
19#include <linux/uaccess.h> 24#include <linux/uaccess.h>
20 25
21#include <asm/ldt.h> 26#include <asm/ldt.h>
27#include <asm/tlb.h>
22#include <asm/desc.h> 28#include <asm/desc.h>
23#include <asm/mmu_context.h> 29#include <asm/mmu_context.h>
24#include <asm/syscalls.h> 30#include <asm/syscalls.h>
@@ -42,17 +48,15 @@ static void refresh_ldt_segments(void)
42#endif 48#endif
43} 49}
44 50
45/* context.lock is held for us, so we don't need any locking. */ 51/* context.lock is held by the task which issued the smp function call */
46static void flush_ldt(void *__mm) 52static void flush_ldt(void *__mm)
47{ 53{
48 struct mm_struct *mm = __mm; 54 struct mm_struct *mm = __mm;
49 mm_context_t *pc;
50 55
51 if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm) 56 if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
52 return; 57 return;
53 58
54 pc = &mm->context; 59 load_mm_ldt(mm);
55 set_ldt(pc->ldt->entries, pc->ldt->nr_entries);
56 60
57 refresh_ldt_segments(); 61 refresh_ldt_segments();
58} 62}
@@ -89,25 +93,143 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
89 return NULL; 93 return NULL;
90 } 94 }
91 95
96 /* The new LDT isn't aliased for PTI yet. */
97 new_ldt->slot = -1;
98
92 new_ldt->nr_entries = num_entries; 99 new_ldt->nr_entries = num_entries;
93 return new_ldt; 100 return new_ldt;
94} 101}
95 102
103/*
104 * If PTI is enabled, this maps the LDT into the kernelmode and
105 * usermode tables for the given mm.
106 *
107 * There is no corresponding unmap function. Even if the LDT is freed, we
108 * leave the PTEs around until the slot is reused or the mm is destroyed.
109 * This is harmless: the LDT is always in ordinary memory, and no one will
110 * access the freed slot.
111 *
112 * If we wanted to unmap freed LDTs, we'd also need to do a flush to make
113 * it useful, and the flush would slow down modify_ldt().
114 */
115static int
116map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
117{
118#ifdef CONFIG_PAGE_TABLE_ISOLATION
119 bool is_vmalloc, had_top_level_entry;
120 unsigned long va;
121 spinlock_t *ptl;
122 pgd_t *pgd;
123 int i;
124
125 if (!static_cpu_has(X86_FEATURE_PTI))
126 return 0;
127
128 /*
129 * Any given ldt_struct should have map_ldt_struct() called at most
130 * once.
131 */
132 WARN_ON(ldt->slot != -1);
133
134 /*
135 * Did we already have the top level entry allocated? We can't
136 * use pgd_none() for this because it doens't do anything on
137 * 4-level page table kernels.
138 */
139 pgd = pgd_offset(mm, LDT_BASE_ADDR);
140 had_top_level_entry = (pgd->pgd != 0);
141
142 is_vmalloc = is_vmalloc_addr(ldt->entries);
143
144 for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) {
145 unsigned long offset = i << PAGE_SHIFT;
146 const void *src = (char *)ldt->entries + offset;
147 unsigned long pfn;
148 pte_t pte, *ptep;
149
150 va = (unsigned long)ldt_slot_va(slot) + offset;
151 pfn = is_vmalloc ? vmalloc_to_pfn(src) :
152 page_to_pfn(virt_to_page(src));
153 /*
154 * Treat the PTI LDT range as a *userspace* range.
155 * get_locked_pte() will allocate all needed pagetables
156 * and account for them in this mm.
157 */
158 ptep = get_locked_pte(mm, va, &ptl);
159 if (!ptep)
160 return -ENOMEM;
161 /*
162 * Map it RO so the easy to find address is not a primary
163 * target via some kernel interface which misses a
164 * permission check.
165 */
166 pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL));
167 set_pte_at(mm, va, ptep, pte);
168 pte_unmap_unlock(ptep, ptl);
169 }
170
171 if (mm->context.ldt) {
172 /*
173 * We already had an LDT. The top-level entry should already
174 * have been allocated and synchronized with the usermode
175 * tables.
176 */
177 WARN_ON(!had_top_level_entry);
178 if (static_cpu_has(X86_FEATURE_PTI))
179 WARN_ON(!kernel_to_user_pgdp(pgd)->pgd);
180 } else {
181 /*
182 * This is the first time we're mapping an LDT for this process.
183 * Sync the pgd to the usermode tables.
184 */
185 WARN_ON(had_top_level_entry);
186 if (static_cpu_has(X86_FEATURE_PTI)) {
187 WARN_ON(kernel_to_user_pgdp(pgd)->pgd);
188 set_pgd(kernel_to_user_pgdp(pgd), *pgd);
189 }
190 }
191
192 va = (unsigned long)ldt_slot_va(slot);
193 flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0);
194
195 ldt->slot = slot;
196#endif
197 return 0;
198}
199
200static void free_ldt_pgtables(struct mm_struct *mm)
201{
202#ifdef CONFIG_PAGE_TABLE_ISOLATION
203 struct mmu_gather tlb;
204 unsigned long start = LDT_BASE_ADDR;
205 unsigned long end = start + (1UL << PGDIR_SHIFT);
206
207 if (!static_cpu_has(X86_FEATURE_PTI))
208 return;
209
210 tlb_gather_mmu(&tlb, mm, start, end);
211 free_pgd_range(&tlb, start, end, start, end);
212 tlb_finish_mmu(&tlb, start, end);
213#endif
214}
215
96/* After calling this, the LDT is immutable. */ 216/* After calling this, the LDT is immutable. */
97static void finalize_ldt_struct(struct ldt_struct *ldt) 217static void finalize_ldt_struct(struct ldt_struct *ldt)
98{ 218{
99 paravirt_alloc_ldt(ldt->entries, ldt->nr_entries); 219 paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
100} 220}
101 221
102/* context.lock is held */ 222static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
103static void install_ldt(struct mm_struct *current_mm,
104 struct ldt_struct *ldt)
105{ 223{
224 mutex_lock(&mm->context.lock);
225
106 /* Synchronizes with READ_ONCE in load_mm_ldt. */ 226 /* Synchronizes with READ_ONCE in load_mm_ldt. */
107 smp_store_release(&current_mm->context.ldt, ldt); 227 smp_store_release(&mm->context.ldt, ldt);
108 228
109 /* Activate the LDT for all CPUs using current_mm. */ 229 /* Activate the LDT for all CPUs using currents mm. */
110 on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true); 230 on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
231
232 mutex_unlock(&mm->context.lock);
111} 233}
112 234
113static void free_ldt_struct(struct ldt_struct *ldt) 235static void free_ldt_struct(struct ldt_struct *ldt)
@@ -124,27 +246,20 @@ static void free_ldt_struct(struct ldt_struct *ldt)
124} 246}
125 247
126/* 248/*
127 * we do not have to muck with descriptors here, that is 249 * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
128 * done in switch_mm() as needed. 250 * the new task is not running, so nothing can be installed.
129 */ 251 */
130int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm) 252int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
131{ 253{
132 struct ldt_struct *new_ldt; 254 struct ldt_struct *new_ldt;
133 struct mm_struct *old_mm;
134 int retval = 0; 255 int retval = 0;
135 256
136 mutex_init(&mm->context.lock); 257 if (!old_mm)
137 old_mm = current->mm;
138 if (!old_mm) {
139 mm->context.ldt = NULL;
140 return 0; 258 return 0;
141 }
142 259
143 mutex_lock(&old_mm->context.lock); 260 mutex_lock(&old_mm->context.lock);
144 if (!old_mm->context.ldt) { 261 if (!old_mm->context.ldt)
145 mm->context.ldt = NULL;
146 goto out_unlock; 262 goto out_unlock;
147 }
148 263
149 new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries); 264 new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
150 if (!new_ldt) { 265 if (!new_ldt) {
@@ -156,6 +271,12 @@ int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
156 new_ldt->nr_entries * LDT_ENTRY_SIZE); 271 new_ldt->nr_entries * LDT_ENTRY_SIZE);
157 finalize_ldt_struct(new_ldt); 272 finalize_ldt_struct(new_ldt);
158 273
274 retval = map_ldt_struct(mm, new_ldt, 0);
275 if (retval) {
276 free_ldt_pgtables(mm);
277 free_ldt_struct(new_ldt);
278 goto out_unlock;
279 }
159 mm->context.ldt = new_ldt; 280 mm->context.ldt = new_ldt;
160 281
161out_unlock: 282out_unlock:
@@ -174,13 +295,18 @@ void destroy_context_ldt(struct mm_struct *mm)
174 mm->context.ldt = NULL; 295 mm->context.ldt = NULL;
175} 296}
176 297
298void ldt_arch_exit_mmap(struct mm_struct *mm)
299{
300 free_ldt_pgtables(mm);
301}
302
177static int read_ldt(void __user *ptr, unsigned long bytecount) 303static int read_ldt(void __user *ptr, unsigned long bytecount)
178{ 304{
179 struct mm_struct *mm = current->mm; 305 struct mm_struct *mm = current->mm;
180 unsigned long entries_size; 306 unsigned long entries_size;
181 int retval; 307 int retval;
182 308
183 mutex_lock(&mm->context.lock); 309 down_read(&mm->context.ldt_usr_sem);
184 310
185 if (!mm->context.ldt) { 311 if (!mm->context.ldt) {
186 retval = 0; 312 retval = 0;
@@ -209,7 +335,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
209 retval = bytecount; 335 retval = bytecount;
210 336
211out_unlock: 337out_unlock:
212 mutex_unlock(&mm->context.lock); 338 up_read(&mm->context.ldt_usr_sem);
213 return retval; 339 return retval;
214} 340}
215 341
@@ -269,7 +395,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
269 ldt.avl = 0; 395 ldt.avl = 0;
270 } 396 }
271 397
272 mutex_lock(&mm->context.lock); 398 if (down_write_killable(&mm->context.ldt_usr_sem))
399 return -EINTR;
273 400
274 old_ldt = mm->context.ldt; 401 old_ldt = mm->context.ldt;
275 old_nr_entries = old_ldt ? old_ldt->nr_entries : 0; 402 old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
@@ -286,12 +413,31 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
286 new_ldt->entries[ldt_info.entry_number] = ldt; 413 new_ldt->entries[ldt_info.entry_number] = ldt;
287 finalize_ldt_struct(new_ldt); 414 finalize_ldt_struct(new_ldt);
288 415
416 /*
417 * If we are using PTI, map the new LDT into the userspace pagetables.
418 * If there is already an LDT, use the other slot so that other CPUs
419 * will continue to use the old LDT until install_ldt() switches
420 * them over to the new LDT.
421 */
422 error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
423 if (error) {
424 /*
425 * This only can fail for the first LDT setup. If an LDT is
426 * already installed then the PTE page is already
427 * populated. Mop up a half populated page table.
428 */
429 if (!WARN_ON_ONCE(old_ldt))
430 free_ldt_pgtables(mm);
431 free_ldt_struct(new_ldt);
432 goto out_unlock;
433 }
434
289 install_ldt(mm, new_ldt); 435 install_ldt(mm, new_ldt);
290 free_ldt_struct(old_ldt); 436 free_ldt_struct(old_ldt);
291 error = 0; 437 error = 0;
292 438
293out_unlock: 439out_unlock:
294 mutex_unlock(&mm->context.lock); 440 up_write(&mm->context.ldt_usr_sem);
295out: 441out:
296 return error; 442 return error;
297} 443}
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 00bc751c861c..edfede768688 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -48,8 +48,6 @@ static void load_segments(void)
48 "\tmovl $"STR(__KERNEL_DS)",%%eax\n" 48 "\tmovl $"STR(__KERNEL_DS)",%%eax\n"
49 "\tmovl %%eax,%%ds\n" 49 "\tmovl %%eax,%%ds\n"
50 "\tmovl %%eax,%%es\n" 50 "\tmovl %%eax,%%es\n"
51 "\tmovl %%eax,%%fs\n"
52 "\tmovl %%eax,%%gs\n"
53 "\tmovl %%eax,%%ss\n" 51 "\tmovl %%eax,%%ss\n"
54 : : : "eax", "memory"); 52 : : : "eax", "memory");
55#undef STR 53#undef STR
@@ -232,8 +230,8 @@ void machine_kexec(struct kimage *image)
232 * The gdt & idt are now invalid. 230 * The gdt & idt are now invalid.
233 * If you want to load them you must set up your own idt & gdt. 231 * If you want to load them you must set up your own idt & gdt.
234 */ 232 */
235 set_gdt(phys_to_virt(0), 0);
236 idt_invalidate(phys_to_virt(0)); 233 idt_invalidate(phys_to_virt(0));
234 set_gdt(phys_to_virt(0), 0);
237 235
238 /* now call it */ 236 /* now call it */
239 image->start = relocate_kernel_ptr((unsigned long)image->head, 237 image->start = relocate_kernel_ptr((unsigned long)image->head,
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index ac0be8283325..9edadabf04f6 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
10DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); 10DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
11DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); 11DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
12DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); 12DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
13DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
14DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); 13DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
15 14
16DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); 15DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
@@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
60 PATCH_SITE(pv_mmu_ops, read_cr2); 59 PATCH_SITE(pv_mmu_ops, read_cr2);
61 PATCH_SITE(pv_mmu_ops, read_cr3); 60 PATCH_SITE(pv_mmu_ops, read_cr3);
62 PATCH_SITE(pv_mmu_ops, write_cr3); 61 PATCH_SITE(pv_mmu_ops, write_cr3);
63 PATCH_SITE(pv_mmu_ops, flush_tlb_single);
64 PATCH_SITE(pv_cpu_ops, wbinvd); 62 PATCH_SITE(pv_cpu_ops, wbinvd);
65#if defined(CONFIG_PARAVIRT_SPINLOCKS) 63#if defined(CONFIG_PARAVIRT_SPINLOCKS)
66 case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): 64 case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index bb988a24db92..aed9d94bd46f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -47,7 +47,7 @@
47 * section. Since TSS's are completely CPU-local, we want them 47 * section. Since TSS's are completely CPU-local, we want them
48 * on exact cacheline boundaries, to eliminate cacheline ping-pong. 48 * on exact cacheline boundaries, to eliminate cacheline ping-pong.
49 */ 49 */
50__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { 50__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
51 .x86_tss = { 51 .x86_tss = {
52 /* 52 /*
53 * .sp0 is only used when entering ring 0 from a lower 53 * .sp0 is only used when entering ring 0 from a lower
@@ -56,6 +56,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
56 * Poison it. 56 * Poison it.
57 */ 57 */
58 .sp0 = (1UL << (BITS_PER_LONG-1)) + 1, 58 .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
59
60#ifdef CONFIG_X86_64
61 /*
62 * .sp1 is cpu_current_top_of_stack. The init task never
63 * runs user code, but cpu_current_top_of_stack should still
64 * be well defined before the first context switch.
65 */
66 .sp1 = TOP_OF_INIT_STACK,
67#endif
68
59#ifdef CONFIG_X86_32 69#ifdef CONFIG_X86_32
60 .ss0 = __KERNEL_DS, 70 .ss0 = __KERNEL_DS,
61 .ss1 = __KERNEL_CS, 71 .ss1 = __KERNEL_CS,
@@ -71,11 +81,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
71 */ 81 */
72 .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, 82 .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
73#endif 83#endif
74#ifdef CONFIG_X86_32
75 .SYSENTER_stack_canary = STACK_END_MAGIC,
76#endif
77}; 84};
78EXPORT_PER_CPU_SYMBOL(cpu_tss); 85EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
79 86
80DEFINE_PER_CPU(bool, __tss_limit_invalid); 87DEFINE_PER_CPU(bool, __tss_limit_invalid);
81EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid); 88EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
@@ -104,7 +111,7 @@ void exit_thread(struct task_struct *tsk)
104 struct fpu *fpu = &t->fpu; 111 struct fpu *fpu = &t->fpu;
105 112
106 if (bp) { 113 if (bp) {
107 struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu()); 114 struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
108 115
109 t->io_bitmap_ptr = NULL; 116 t->io_bitmap_ptr = NULL;
110 clear_thread_flag(TIF_IO_BITMAP); 117 clear_thread_flag(TIF_IO_BITMAP);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 45bf0c5f93e1..5224c6099184 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
234 struct fpu *prev_fpu = &prev->fpu; 234 struct fpu *prev_fpu = &prev->fpu;
235 struct fpu *next_fpu = &next->fpu; 235 struct fpu *next_fpu = &next->fpu;
236 int cpu = smp_processor_id(); 236 int cpu = smp_processor_id();
237 struct tss_struct *tss = &per_cpu(cpu_tss, cpu); 237 struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
238 238
239 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ 239 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
240 240
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index eeeb34f85c25..c75466232016 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all)
69 unsigned int fsindex, gsindex; 69 unsigned int fsindex, gsindex;
70 unsigned int ds, cs, es; 70 unsigned int ds, cs, es;
71 71
72 printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip); 72 show_iret_regs(regs);
73 printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss, 73
74 regs->sp, regs->flags);
75 if (regs->orig_ax != -1) 74 if (regs->orig_ax != -1)
76 pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax); 75 pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
77 else 76 else
@@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all)
88 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", 87 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
89 regs->r13, regs->r14, regs->r15); 88 regs->r13, regs->r14, regs->r15);
90 89
90 if (!all)
91 return;
92
91 asm("movl %%ds,%0" : "=r" (ds)); 93 asm("movl %%ds,%0" : "=r" (ds));
92 asm("movl %%cs,%0" : "=r" (cs)); 94 asm("movl %%cs,%0" : "=r" (cs));
93 asm("movl %%es,%0" : "=r" (es)); 95 asm("movl %%es,%0" : "=r" (es));
@@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all)
98 rdmsrl(MSR_GS_BASE, gs); 100 rdmsrl(MSR_GS_BASE, gs);
99 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 101 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
100 102
101 if (!all)
102 return;
103
104 cr0 = read_cr0(); 103 cr0 = read_cr0();
105 cr2 = read_cr2(); 104 cr2 = read_cr2();
106 cr3 = __read_cr3(); 105 cr3 = __read_cr3();
@@ -400,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
400 struct fpu *prev_fpu = &prev->fpu; 399 struct fpu *prev_fpu = &prev->fpu;
401 struct fpu *next_fpu = &next->fpu; 400 struct fpu *next_fpu = &next->fpu;
402 int cpu = smp_processor_id(); 401 int cpu = smp_processor_id();
403 struct tss_struct *tss = &per_cpu(cpu_tss, cpu); 402 struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
404 403
405 WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && 404 WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
406 this_cpu_read(irq_count) != -1); 405 this_cpu_read(irq_count) != -1);
@@ -462,6 +461,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
462 * Switch the PDA and FPU contexts. 461 * Switch the PDA and FPU contexts.
463 */ 462 */
464 this_cpu_write(current_task, next_p); 463 this_cpu_write(current_task, next_p);
464 this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
465 465
466 /* Reload sp0. */ 466 /* Reload sp0. */
467 update_sp0(next_p); 467 update_sp0(next_p);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 05a97d5fe298..ed556d50d7ed 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -106,7 +106,7 @@ EXPORT_SYMBOL(__max_logical_packages);
106static unsigned int logical_packages __read_mostly; 106static unsigned int logical_packages __read_mostly;
107 107
108/* Maximum number of SMT threads on any online core */ 108/* Maximum number of SMT threads on any online core */
109int __max_smt_threads __read_mostly; 109int __read_mostly __max_smt_threads = 1;
110 110
111/* Flag to indicate if a complete sched domain rebuild is required */ 111/* Flag to indicate if a complete sched domain rebuild is required */
112bool x86_topology_update; 112bool x86_topology_update;
@@ -126,14 +126,10 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
126 spin_lock_irqsave(&rtc_lock, flags); 126 spin_lock_irqsave(&rtc_lock, flags);
127 CMOS_WRITE(0xa, 0xf); 127 CMOS_WRITE(0xa, 0xf);
128 spin_unlock_irqrestore(&rtc_lock, flags); 128 spin_unlock_irqrestore(&rtc_lock, flags);
129 local_flush_tlb();
130 pr_debug("1.\n");
131 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = 129 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
132 start_eip >> 4; 130 start_eip >> 4;
133 pr_debug("2.\n");
134 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 131 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
135 start_eip & 0xf; 132 start_eip & 0xf;
136 pr_debug("3.\n");
137} 133}
138 134
139static inline void smpboot_restore_warm_reset_vector(void) 135static inline void smpboot_restore_warm_reset_vector(void)
@@ -141,11 +137,6 @@ static inline void smpboot_restore_warm_reset_vector(void)
141 unsigned long flags; 137 unsigned long flags;
142 138
143 /* 139 /*
144 * Install writable page 0 entry to set BIOS data area.
145 */
146 local_flush_tlb();
147
148 /*
149 * Paranoid: Set warm reset code and vector here back 140 * Paranoid: Set warm reset code and vector here back
150 * to default values. 141 * to default values.
151 */ 142 */
@@ -932,12 +923,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
932 initial_code = (unsigned long)start_secondary; 923 initial_code = (unsigned long)start_secondary;
933 initial_stack = idle->thread.sp; 924 initial_stack = idle->thread.sp;
934 925
935 /* 926 /* Enable the espfix hack for this CPU */
936 * Enable the espfix hack for this CPU
937 */
938#ifdef CONFIG_X86_ESPFIX64
939 init_espfix_ap(cpu); 927 init_espfix_ap(cpu);
940#endif
941 928
942 /* So we see what's up */ 929 /* So we see what's up */
943 announce_cpu(cpu, apicid); 930 announce_cpu(cpu, apicid);
@@ -1304,7 +1291,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1304 * Today neither Intel nor AMD support heterogenous systems so 1291 * Today neither Intel nor AMD support heterogenous systems so
1305 * extrapolate the boot cpu's data to all packages. 1292 * extrapolate the boot cpu's data to all packages.
1306 */ 1293 */
1307 ncpus = cpu_data(0).booted_cores * smp_num_siblings; 1294 ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
1308 __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus); 1295 __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
1309 pr_info("Max logical packages: %u\n", __max_logical_packages); 1296 pr_info("Max logical packages: %u\n", __max_logical_packages);
1310 1297
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 77835bc021c7..20161ef53537 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -164,8 +164,12 @@ int save_stack_trace_tsk_reliable(struct task_struct *tsk,
164{ 164{
165 int ret; 165 int ret;
166 166
167 /*
168 * If the task doesn't have a stack (e.g., a zombie), the stack is
169 * "reliably" empty.
170 */
167 if (!try_get_task_stack(tsk)) 171 if (!try_get_task_stack(tsk))
168 return -EINVAL; 172 return 0;
169 173
170 ret = __save_stack_trace_reliable(trace, tsk); 174 ret = __save_stack_trace_reliable(trace, tsk);
171 175
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 9a9c9b076955..a5b802a12212 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -93,17 +93,10 @@ static void set_tls_desc(struct task_struct *p, int idx,
93 cpu = get_cpu(); 93 cpu = get_cpu();
94 94
95 while (n-- > 0) { 95 while (n-- > 0) {
96 if (LDT_empty(info) || LDT_zero(info)) { 96 if (LDT_empty(info) || LDT_zero(info))
97 memset(desc, 0, sizeof(*desc)); 97 memset(desc, 0, sizeof(*desc));
98 } else { 98 else
99 fill_ldt(desc, info); 99 fill_ldt(desc, info);
100
101 /*
102 * Always set the accessed bit so that the CPU
103 * doesn't try to write to the (read-only) GDT.
104 */
105 desc->type |= 1;
106 }
107 ++info; 100 ++info;
108 ++desc; 101 ++desc;
109 } 102 }
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 989514c94a55..446c9ef8cfc3 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -51,6 +51,7 @@
51#include <asm/traps.h> 51#include <asm/traps.h>
52#include <asm/desc.h> 52#include <asm/desc.h>
53#include <asm/fpu/internal.h> 53#include <asm/fpu/internal.h>
54#include <asm/cpu_entry_area.h>
54#include <asm/mce.h> 55#include <asm/mce.h>
55#include <asm/fixmap.h> 56#include <asm/fixmap.h>
56#include <asm/mach_traps.h> 57#include <asm/mach_traps.h>
@@ -348,23 +349,42 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
348 349
349 /* 350 /*
350 * If IRET takes a non-IST fault on the espfix64 stack, then we 351 * If IRET takes a non-IST fault on the espfix64 stack, then we
351 * end up promoting it to a doublefault. In that case, modify 352 * end up promoting it to a doublefault. In that case, take
352 * the stack to make it look like we just entered the #GP 353 * advantage of the fact that we're not using the normal (TSS.sp0)
353 * handler from user space, similar to bad_iret. 354 * stack right now. We can write a fake #GP(0) frame at TSS.sp0
355 * and then modify our own IRET frame so that, when we return,
356 * we land directly at the #GP(0) vector with the stack already
357 * set up according to its expectations.
358 *
359 * The net result is that our #GP handler will think that we
360 * entered from usermode with the bad user context.
354 * 361 *
355 * No need for ist_enter here because we don't use RCU. 362 * No need for ist_enter here because we don't use RCU.
356 */ 363 */
357 if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && 364 if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY &&
358 regs->cs == __KERNEL_CS && 365 regs->cs == __KERNEL_CS &&
359 regs->ip == (unsigned long)native_irq_return_iret) 366 regs->ip == (unsigned long)native_irq_return_iret)
360 { 367 {
361 struct pt_regs *normal_regs = task_pt_regs(current); 368 struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
362 369
363 /* Fake a #GP(0) from userspace. */ 370 /*
364 memmove(&normal_regs->ip, (void *)regs->sp, 5*8); 371 * regs->sp points to the failing IRET frame on the
365 normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ 372 * ESPFIX64 stack. Copy it to the entry stack. This fills
373 * in gpregs->ss through gpregs->ip.
374 *
375 */
376 memmove(&gpregs->ip, (void *)regs->sp, 5*8);
377 gpregs->orig_ax = 0; /* Missing (lost) #GP error code */
378
379 /*
380 * Adjust our frame so that we return straight to the #GP
381 * vector with the expected RSP value. This is safe because
382 * we won't enable interupts or schedule before we invoke
383 * general_protection, so nothing will clobber the stack
384 * frame we just set up.
385 */
366 regs->ip = (unsigned long)general_protection; 386 regs->ip = (unsigned long)general_protection;
367 regs->sp = (unsigned long)&normal_regs->orig_ax; 387 regs->sp = (unsigned long)&gpregs->orig_ax;
368 388
369 return; 389 return;
370 } 390 }
@@ -389,7 +409,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
389 * 409 *
390 * Processors update CR2 whenever a page fault is detected. If a 410 * Processors update CR2 whenever a page fault is detected. If a
391 * second page fault occurs while an earlier page fault is being 411 * second page fault occurs while an earlier page fault is being
392 * deliv- ered, the faulting linear address of the second fault will 412 * delivered, the faulting linear address of the second fault will
393 * overwrite the contents of CR2 (replacing the previous 413 * overwrite the contents of CR2 (replacing the previous
394 * address). These updates to CR2 occur even if the page fault 414 * address). These updates to CR2 occur even if the page fault
395 * results in a double fault or occurs during the delivery of a 415 * results in a double fault or occurs during the delivery of a
@@ -605,14 +625,15 @@ NOKPROBE_SYMBOL(do_int3);
605 625
606#ifdef CONFIG_X86_64 626#ifdef CONFIG_X86_64
607/* 627/*
608 * Help handler running on IST stack to switch off the IST stack if the 628 * Help handler running on a per-cpu (IST or entry trampoline) stack
609 * interrupted code was in user mode. The actual stack switch is done in 629 * to switch to the normal thread stack if the interrupted code was in
610 * entry_64.S 630 * user mode. The actual stack switch is done in entry_64.S
611 */ 631 */
612asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) 632asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
613{ 633{
614 struct pt_regs *regs = task_pt_regs(current); 634 struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
615 *regs = *eregs; 635 if (regs != eregs)
636 *regs = *eregs;
616 return regs; 637 return regs;
617} 638}
618NOKPROBE_SYMBOL(sync_regs); 639NOKPROBE_SYMBOL(sync_regs);
@@ -628,13 +649,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
628 /* 649 /*
629 * This is called from entry_64.S early in handling a fault 650 * This is called from entry_64.S early in handling a fault
630 * caused by a bad iret to user mode. To handle the fault 651 * caused by a bad iret to user mode. To handle the fault
631 * correctly, we want move our stack frame to task_pt_regs 652 * correctly, we want to move our stack frame to where it would
632 * and we want to pretend that the exception came from the 653 * be had we entered directly on the entry stack (rather than
633 * iret target. 654 * just below the IRET frame) and we want to pretend that the
655 * exception came from the IRET target.
634 */ 656 */
635 struct bad_iret_stack *new_stack = 657 struct bad_iret_stack *new_stack =
636 container_of(task_pt_regs(current), 658 (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
637 struct bad_iret_stack, regs);
638 659
639 /* Copy the IRET target to the new stack. */ 660 /* Copy the IRET target to the new stack. */
640 memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); 661 memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
@@ -795,14 +816,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
795 debug_stack_usage_dec(); 816 debug_stack_usage_dec();
796 817
797exit: 818exit:
798#if defined(CONFIG_X86_32)
799 /*
800 * This is the most likely code path that involves non-trivial use
801 * of the SYSENTER stack. Check that we haven't overrun it.
802 */
803 WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
804 "Overran or corrupted SYSENTER stack\n");
805#endif
806 ist_exit(regs); 819 ist_exit(regs);
807} 820}
808NOKPROBE_SYMBOL(do_debug); 821NOKPROBE_SYMBOL(do_debug);
@@ -929,6 +942,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
929 942
930void __init trap_init(void) 943void __init trap_init(void)
931{ 944{
945 /* Init cpu_entry_area before IST entries are set up */
946 setup_cpu_entry_areas();
947
932 idt_setup_traps(); 948 idt_setup_traps();
933 949
934 /* 950 /*
@@ -936,8 +952,9 @@ void __init trap_init(void)
936 * "sidt" instruction will not leak the location of the kernel, and 952 * "sidt" instruction will not leak the location of the kernel, and
937 * to defend the IDT against arbitrary memory write vulnerabilities. 953 * to defend the IDT against arbitrary memory write vulnerabilities.
938 * It will be reloaded in cpu_init() */ 954 * It will be reloaded in cpu_init() */
939 __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO); 955 cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
940 idt_descr.address = fix_to_virt(FIX_RO_IDT); 956 PAGE_KERNEL_RO);
957 idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
941 958
942 /* 959 /*
943 * Should be a barrier for any external CPU state: 960 * Should be a barrier for any external CPU state:
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index a3f973b2c97a..be86a865087a 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
253 return NULL; 253 return NULL;
254} 254}
255 255
256static bool stack_access_ok(struct unwind_state *state, unsigned long addr, 256static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
257 size_t len) 257 size_t len)
258{ 258{
259 struct stack_info *info = &state->stack_info; 259 struct stack_info *info = &state->stack_info;
260 void *addr = (void *)_addr;
260 261
261 /* 262 if (!on_stack(info, addr, len) &&
262 * If the address isn't on the current stack, switch to the next one. 263 (get_stack_info(addr, state->task, info, &state->stack_mask)))
263 * 264 return false;
264 * We may have to traverse multiple stacks to deal with the possibility
265 * that info->next_sp could point to an empty stack and the address
266 * could be on a subsequent stack.
267 */
268 while (!on_stack(info, (void *)addr, len))
269 if (get_stack_info(info->next_sp, state->task, info,
270 &state->stack_mask))
271 return false;
272 265
273 return true; 266 return true;
274} 267}
@@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
283 return true; 276 return true;
284} 277}
285 278
286#define REGS_SIZE (sizeof(struct pt_regs))
287#define SP_OFFSET (offsetof(struct pt_regs, sp))
288#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
289#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
290
291static bool deref_stack_regs(struct unwind_state *state, unsigned long addr, 279static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
292 unsigned long *ip, unsigned long *sp, bool full) 280 unsigned long *ip, unsigned long *sp)
293{ 281{
294 size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE; 282 struct pt_regs *regs = (struct pt_regs *)addr;
295 size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
296 struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
297
298 if (IS_ENABLED(CONFIG_X86_64)) {
299 if (!stack_access_ok(state, addr, regs_size))
300 return false;
301 283
302 *ip = regs->ip; 284 /* x86-32 support will be more complicated due to the &regs->sp hack */
303 *sp = regs->sp; 285 BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));
304 286
305 return true; 287 if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
306 }
307
308 if (!stack_access_ok(state, addr, sp_offset))
309 return false; 288 return false;
310 289
311 *ip = regs->ip; 290 *ip = regs->ip;
291 *sp = regs->sp;
292 return true;
293}
312 294
313 if (user_mode(regs)) { 295static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
314 if (!stack_access_ok(state, addr + sp_offset, 296 unsigned long *ip, unsigned long *sp)
315 REGS_SIZE - SP_OFFSET)) 297{
316 return false; 298 struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;
317 299
318 *sp = regs->sp; 300 if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
319 } else 301 return false;
320 *sp = (unsigned long)&regs->sp;
321 302
303 *ip = regs->ip;
304 *sp = regs->sp;
322 return true; 305 return true;
323} 306}
324 307
@@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state)
327 unsigned long ip_p, sp, orig_ip, prev_sp = state->sp; 310 unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
328 enum stack_type prev_type = state->stack_info.type; 311 enum stack_type prev_type = state->stack_info.type;
329 struct orc_entry *orc; 312 struct orc_entry *orc;
330 struct pt_regs *ptregs;
331 bool indirect = false; 313 bool indirect = false;
332 314
333 if (unwind_done(state)) 315 if (unwind_done(state))
@@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state)
435 break; 417 break;
436 418
437 case ORC_TYPE_REGS: 419 case ORC_TYPE_REGS:
438 if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) { 420 if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
439 orc_warn("can't dereference registers at %p for ip %pB\n", 421 orc_warn("can't dereference registers at %p for ip %pB\n",
440 (void *)sp, (void *)orig_ip); 422 (void *)sp, (void *)orig_ip);
441 goto done; 423 goto done;
@@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state)
447 break; 429 break;
448 430
449 case ORC_TYPE_REGS_IRET: 431 case ORC_TYPE_REGS_IRET:
450 if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) { 432 if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
451 orc_warn("can't dereference iret registers at %p for ip %pB\n", 433 orc_warn("can't dereference iret registers at %p for ip %pB\n",
452 (void *)sp, (void *)orig_ip); 434 (void *)sp, (void *)orig_ip);
453 goto done; 435 goto done;
454 } 436 }
455 437
456 ptregs = container_of((void *)sp, struct pt_regs, ip); 438 state->regs = (void *)sp - IRET_FRAME_OFFSET;
457 if ((unsigned long)ptregs >= prev_sp && 439 state->full_regs = false;
458 on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
459 state->regs = ptregs;
460 state->full_regs = false;
461 } else
462 state->regs = NULL;
463
464 state->signal = true; 440 state->signal = true;
465 break; 441 break;
466 442
@@ -553,8 +529,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
553 } 529 }
554 530
555 if (get_stack_info((unsigned long *)state->sp, state->task, 531 if (get_stack_info((unsigned long *)state->sp, state->task,
556 &state->stack_info, &state->stack_mask)) 532 &state->stack_info, &state->stack_mask)) {
557 return; 533 /*
534 * We weren't on a valid stack. It's possible that
535 * we overflowed a valid stack into a guard page.
536 * See if the next page up is valid so that we can
537 * generate some kind of backtrace if this happens.
538 */
539 void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
540 if (get_stack_info(next_page, state->task, &state->stack_info,
541 &state->stack_mask))
542 return;
543 }
558 544
559 /* 545 /*
560 * The caller can provide the address of the first frame directly 546 * The caller can provide the address of the first frame directly
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index a4009fb9be87..1e413a9326aa 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -61,11 +61,17 @@ jiffies_64 = jiffies;
61 . = ALIGN(HPAGE_SIZE); \ 61 . = ALIGN(HPAGE_SIZE); \
62 __end_rodata_hpage_align = .; 62 __end_rodata_hpage_align = .;
63 63
64#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE);
65#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE);
66
64#else 67#else
65 68
66#define X64_ALIGN_RODATA_BEGIN 69#define X64_ALIGN_RODATA_BEGIN
67#define X64_ALIGN_RODATA_END 70#define X64_ALIGN_RODATA_END
68 71
72#define ALIGN_ENTRY_TEXT_BEGIN
73#define ALIGN_ENTRY_TEXT_END
74
69#endif 75#endif
70 76
71PHDRS { 77PHDRS {
@@ -102,11 +108,22 @@ SECTIONS
102 CPUIDLE_TEXT 108 CPUIDLE_TEXT
103 LOCK_TEXT 109 LOCK_TEXT
104 KPROBES_TEXT 110 KPROBES_TEXT
111 ALIGN_ENTRY_TEXT_BEGIN
105 ENTRY_TEXT 112 ENTRY_TEXT
106 IRQENTRY_TEXT 113 IRQENTRY_TEXT
114 ALIGN_ENTRY_TEXT_END
107 SOFTIRQENTRY_TEXT 115 SOFTIRQENTRY_TEXT
108 *(.fixup) 116 *(.fixup)
109 *(.gnu.warning) 117 *(.gnu.warning)
118
119#ifdef CONFIG_X86_64
120 . = ALIGN(PAGE_SIZE);
121 _entry_trampoline = .;
122 *(.entry_trampoline)
123 . = ALIGN(PAGE_SIZE);
124 ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
125#endif
126
110 /* End of text section */ 127 /* End of text section */
111 _etext = .; 128 _etext = .;
112 } :text = 0x9090 129 } :text = 0x9090
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index abe74f779f9d..b514b2b2845a 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2390,9 +2390,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
2390} 2390}
2391 2391
2392static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, 2392static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2393 u64 cr0, u64 cr4) 2393 u64 cr0, u64 cr3, u64 cr4)
2394{ 2394{
2395 int bad; 2395 int bad;
2396 u64 pcid;
2397
2398 /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
2399 pcid = 0;
2400 if (cr4 & X86_CR4_PCIDE) {
2401 pcid = cr3 & 0xfff;
2402 cr3 &= ~0xfff;
2403 }
2404
2405 bad = ctxt->ops->set_cr(ctxt, 3, cr3);
2406 if (bad)
2407 return X86EMUL_UNHANDLEABLE;
2396 2408
2397 /* 2409 /*
2398 * First enable PAE, long mode needs it before CR0.PG = 1 is set. 2410 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
@@ -2411,6 +2423,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2411 bad = ctxt->ops->set_cr(ctxt, 4, cr4); 2423 bad = ctxt->ops->set_cr(ctxt, 4, cr4);
2412 if (bad) 2424 if (bad)
2413 return X86EMUL_UNHANDLEABLE; 2425 return X86EMUL_UNHANDLEABLE;
2426 if (pcid) {
2427 bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
2428 if (bad)
2429 return X86EMUL_UNHANDLEABLE;
2430 }
2431
2414 } 2432 }
2415 2433
2416 return X86EMUL_CONTINUE; 2434 return X86EMUL_CONTINUE;
@@ -2421,11 +2439,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
2421 struct desc_struct desc; 2439 struct desc_struct desc;
2422 struct desc_ptr dt; 2440 struct desc_ptr dt;
2423 u16 selector; 2441 u16 selector;
2424 u32 val, cr0, cr4; 2442 u32 val, cr0, cr3, cr4;
2425 int i; 2443 int i;
2426 2444
2427 cr0 = GET_SMSTATE(u32, smbase, 0x7ffc); 2445 cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
2428 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8)); 2446 cr3 = GET_SMSTATE(u32, smbase, 0x7ff8);
2429 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED; 2447 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
2430 ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0); 2448 ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
2431 2449
@@ -2467,14 +2485,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
2467 2485
2468 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8)); 2486 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
2469 2487
2470 return rsm_enter_protected_mode(ctxt, cr0, cr4); 2488 return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2471} 2489}
2472 2490
2473static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) 2491static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
2474{ 2492{
2475 struct desc_struct desc; 2493 struct desc_struct desc;
2476 struct desc_ptr dt; 2494 struct desc_ptr dt;
2477 u64 val, cr0, cr4; 2495 u64 val, cr0, cr3, cr4;
2478 u32 base3; 2496 u32 base3;
2479 u16 selector; 2497 u16 selector;
2480 int i, r; 2498 int i, r;
@@ -2491,7 +2509,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
2491 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); 2509 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
2492 2510
2493 cr0 = GET_SMSTATE(u64, smbase, 0x7f58); 2511 cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
2494 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50)); 2512 cr3 = GET_SMSTATE(u64, smbase, 0x7f50);
2495 cr4 = GET_SMSTATE(u64, smbase, 0x7f48); 2513 cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
2496 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00)); 2514 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
2497 val = GET_SMSTATE(u64, smbase, 0x7ed0); 2515 val = GET_SMSTATE(u64, smbase, 0x7ed0);
@@ -2519,7 +2537,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
2519 dt.address = GET_SMSTATE(u64, smbase, 0x7e68); 2537 dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
2520 ctxt->ops->set_gdt(ctxt, &dt); 2538 ctxt->ops->set_gdt(ctxt, &dt);
2521 2539
2522 r = rsm_enter_protected_mode(ctxt, cr0, cr4); 2540 r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2523 if (r != X86EMUL_CONTINUE) 2541 if (r != X86EMUL_CONTINUE)
2524 return r; 2542 return r;
2525 2543
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e5e66e5c6640..c4deb1f34faa 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3395,7 +3395,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
3395 spin_lock(&vcpu->kvm->mmu_lock); 3395 spin_lock(&vcpu->kvm->mmu_lock);
3396 if(make_mmu_pages_available(vcpu) < 0) { 3396 if(make_mmu_pages_available(vcpu) < 0) {
3397 spin_unlock(&vcpu->kvm->mmu_lock); 3397 spin_unlock(&vcpu->kvm->mmu_lock);
3398 return 1; 3398 return -ENOSPC;
3399 } 3399 }
3400 sp = kvm_mmu_get_page(vcpu, 0, 0, 3400 sp = kvm_mmu_get_page(vcpu, 0, 0,
3401 vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL); 3401 vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
@@ -3410,7 +3410,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
3410 spin_lock(&vcpu->kvm->mmu_lock); 3410 spin_lock(&vcpu->kvm->mmu_lock);
3411 if (make_mmu_pages_available(vcpu) < 0) { 3411 if (make_mmu_pages_available(vcpu) < 0) {
3412 spin_unlock(&vcpu->kvm->mmu_lock); 3412 spin_unlock(&vcpu->kvm->mmu_lock);
3413 return 1; 3413 return -ENOSPC;
3414 } 3414 }
3415 sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), 3415 sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
3416 i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL); 3416 i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
@@ -3450,7 +3450,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
3450 spin_lock(&vcpu->kvm->mmu_lock); 3450 spin_lock(&vcpu->kvm->mmu_lock);
3451 if (make_mmu_pages_available(vcpu) < 0) { 3451 if (make_mmu_pages_available(vcpu) < 0) {
3452 spin_unlock(&vcpu->kvm->mmu_lock); 3452 spin_unlock(&vcpu->kvm->mmu_lock);
3453 return 1; 3453 return -ENOSPC;
3454 } 3454 }
3455 sp = kvm_mmu_get_page(vcpu, root_gfn, 0, 3455 sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
3456 vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL); 3456 vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
@@ -3487,7 +3487,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
3487 spin_lock(&vcpu->kvm->mmu_lock); 3487 spin_lock(&vcpu->kvm->mmu_lock);
3488 if (make_mmu_pages_available(vcpu) < 0) { 3488 if (make_mmu_pages_available(vcpu) < 0) {
3489 spin_unlock(&vcpu->kvm->mmu_lock); 3489 spin_unlock(&vcpu->kvm->mmu_lock);
3490 return 1; 3490 return -ENOSPC;
3491 } 3491 }
3492 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, 3492 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
3493 0, ACC_ALL); 3493 0, ACC_ALL);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8eba631c4dbd..023afa0c8887 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2302,7 +2302,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2302 * processors. See 22.2.4. 2302 * processors. See 22.2.4.
2303 */ 2303 */
2304 vmcs_writel(HOST_TR_BASE, 2304 vmcs_writel(HOST_TR_BASE,
2305 (unsigned long)this_cpu_ptr(&cpu_tss)); 2305 (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
2306 vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ 2306 vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
2307 2307
2308 /* 2308 /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index faf843c9b916..1cec2c62a0b0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4384,7 +4384,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
4384 addr, n, v)) 4384 addr, n, v))
4385 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) 4385 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
4386 break; 4386 break;
4387 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); 4387 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
4388 handled += n; 4388 handled += n;
4389 addr += n; 4389 addr += n;
4390 len -= n; 4390 len -= n;
@@ -4643,7 +4643,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
4643{ 4643{
4644 if (vcpu->mmio_read_completed) { 4644 if (vcpu->mmio_read_completed) {
4645 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, 4645 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
4646 vcpu->mmio_fragments[0].gpa, *(u64 *)val); 4646 vcpu->mmio_fragments[0].gpa, val);
4647 vcpu->mmio_read_completed = 0; 4647 vcpu->mmio_read_completed = 0;
4648 return 1; 4648 return 1;
4649 } 4649 }
@@ -4665,14 +4665,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
4665 4665
4666static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val) 4666static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
4667{ 4667{
4668 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); 4668 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
4669 return vcpu_mmio_write(vcpu, gpa, bytes, val); 4669 return vcpu_mmio_write(vcpu, gpa, bytes, val);
4670} 4670}
4671 4671
4672static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, 4672static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
4673 void *val, int bytes) 4673 void *val, int bytes)
4674{ 4674{
4675 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); 4675 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
4676 return X86EMUL_IO_NEEDED; 4676 return X86EMUL_IO_NEEDED;
4677} 4677}
4678 4678
@@ -7264,13 +7264,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
7264 7264
7265int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 7265int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
7266{ 7266{
7267 struct fpu *fpu = &current->thread.fpu;
7268 int r; 7267 int r;
7269 7268
7270 fpu__initialize(fpu);
7271
7272 kvm_sigset_activate(vcpu); 7269 kvm_sigset_activate(vcpu);
7273 7270
7271 kvm_load_guest_fpu(vcpu);
7272
7274 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { 7273 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
7275 if (kvm_run->immediate_exit) { 7274 if (kvm_run->immediate_exit) {
7276 r = -EINTR; 7275 r = -EINTR;
@@ -7296,14 +7295,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
7296 } 7295 }
7297 } 7296 }
7298 7297
7299 kvm_load_guest_fpu(vcpu);
7300
7301 if (unlikely(vcpu->arch.complete_userspace_io)) { 7298 if (unlikely(vcpu->arch.complete_userspace_io)) {
7302 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io; 7299 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
7303 vcpu->arch.complete_userspace_io = NULL; 7300 vcpu->arch.complete_userspace_io = NULL;
7304 r = cui(vcpu); 7301 r = cui(vcpu);
7305 if (r <= 0) 7302 if (r <= 0)
7306 goto out_fpu; 7303 goto out;
7307 } else 7304 } else
7308 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); 7305 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
7309 7306
@@ -7312,9 +7309,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
7312 else 7309 else
7313 r = vcpu_run(vcpu); 7310 r = vcpu_run(vcpu);
7314 7311
7315out_fpu:
7316 kvm_put_guest_fpu(vcpu);
7317out: 7312out:
7313 kvm_put_guest_fpu(vcpu);
7318 post_kvm_run_save(vcpu); 7314 post_kvm_run_save(vcpu);
7319 kvm_sigset_deactivate(vcpu); 7315 kvm_sigset_deactivate(vcpu);
7320 7316
@@ -7384,7 +7380,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
7384#endif 7380#endif
7385 7381
7386 kvm_rip_write(vcpu, regs->rip); 7382 kvm_rip_write(vcpu, regs->rip);
7387 kvm_set_rflags(vcpu, regs->rflags); 7383 kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
7388 7384
7389 vcpu->arch.exception.pending = false; 7385 vcpu->arch.exception.pending = false;
7390 7386
@@ -7498,6 +7494,29 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
7498} 7494}
7499EXPORT_SYMBOL_GPL(kvm_task_switch); 7495EXPORT_SYMBOL_GPL(kvm_task_switch);
7500 7496
7497int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
7498{
7499 if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) {
7500 /*
7501 * When EFER.LME and CR0.PG are set, the processor is in
7502 * 64-bit mode (though maybe in a 32-bit code segment).
7503 * CR4.PAE and EFER.LMA must be set.
7504 */
7505 if (!(sregs->cr4 & X86_CR4_PAE_BIT)
7506 || !(sregs->efer & EFER_LMA))
7507 return -EINVAL;
7508 } else {
7509 /*
7510 * Not in 64-bit mode: EFER.LMA is clear and the code
7511 * segment cannot be 64-bit.
7512 */
7513 if (sregs->efer & EFER_LMA || sregs->cs.l)
7514 return -EINVAL;
7515 }
7516
7517 return 0;
7518}
7519
7501int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 7520int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
7502 struct kvm_sregs *sregs) 7521 struct kvm_sregs *sregs)
7503{ 7522{
@@ -7510,6 +7529,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
7510 (sregs->cr4 & X86_CR4_OSXSAVE)) 7529 (sregs->cr4 & X86_CR4_OSXSAVE))
7511 return -EINVAL; 7530 return -EINVAL;
7512 7531
7532 if (kvm_valid_sregs(vcpu, sregs))
7533 return -EINVAL;
7534
7513 apic_base_msr.data = sregs->apic_base; 7535 apic_base_msr.data = sregs->apic_base;
7514 apic_base_msr.host_initiated = true; 7536 apic_base_msr.host_initiated = true;
7515 if (kvm_set_apic_base(vcpu, &apic_base_msr)) 7537 if (kvm_set_apic_base(vcpu, &apic_base_msr))
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 553f8fd23cc4..4846eff7e4c8 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops)
107 delay = min_t(u64, MWAITX_MAX_LOOPS, loops); 107 delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
108 108
109 /* 109 /*
110 * Use cpu_tss as a cacheline-aligned, seldomly 110 * Use cpu_tss_rw as a cacheline-aligned, seldomly
111 * accessed per-cpu variable as the monitor target. 111 * accessed per-cpu variable as the monitor target.
112 */ 112 */
113 __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0); 113 __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
114 114
115 /* 115 /*
116 * AMD, like Intel, supports the EAX hint and EAX=0xf 116 * AMD, like Intel, supports the EAX hint and EAX=0xf
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index c4d55919fac1..e0b85930dd77 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
607fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) 607fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
608fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) 608fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
609fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) 609fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
610ff: 610ff: UD0
611EndTable 611EndTable
612 612
613Table: 3-byte opcode 1 (0x0f 0x38) 613Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
7177e: vpermt2d/q Vx,Hx,Wx (66),(ev) 7177e: vpermt2d/q Vx,Hx,Wx (66),(ev)
7187f: vpermt2ps/d Vx,Hx,Wx (66),(ev) 7187f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
71980: INVEPT Gy,Mdq (66) 71980: INVEPT Gy,Mdq (66)
72081: INVPID Gy,Mdq (66) 72081: INVVPID Gy,Mdq (66)
72182: INVPCID Gy,Mdq (66) 72182: INVPCID Gy,Mdq (66)
72283: vpmultishiftqb Vx,Hx,Wx (66),(ev) 72283: vpmultishiftqb Vx,Hx,Wx (66),(ev)
72388: vexpandps/d Vpd,Wpd (66),(ev) 72388: vexpandps/d Vpd,Wpd (66),(ev)
@@ -970,6 +970,15 @@ GrpTable: Grp9
970EndTable 970EndTable
971 971
972GrpTable: Grp10 972GrpTable: Grp10
973# all are UD1
9740: UD1
9751: UD1
9762: UD1
9773: UD1
9784: UD1
9795: UD1
9806: UD1
9817: UD1
973EndTable 982EndTable
974 983
975# Grp11A and Grp11B are expressed as Grp11 in Intel SDM 984# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 8e13b8cc6bed..27e9e90a8d35 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -10,7 +10,7 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg
10endif 10endif
11 11
12obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ 12obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
13 pat.o pgtable.o physaddr.o setup_nx.o tlb.o 13 pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
14 14
15# Make sure __phys_addr has no stackprotector 15# Make sure __phys_addr has no stackprotector
16nostackp := $(call cc-option, -fno-stack-protector) 16nostackp := $(call cc-option, -fno-stack-protector)
@@ -41,9 +41,10 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o
41obj-$(CONFIG_ACPI_NUMA) += srat.o 41obj-$(CONFIG_ACPI_NUMA) += srat.o
42obj-$(CONFIG_NUMA_EMU) += numa_emulation.o 42obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
43 43
44obj-$(CONFIG_X86_INTEL_MPX) += mpx.o 44obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
45obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o 45obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
46obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o 46obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
47obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o
47 48
48obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o 49obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o
49obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o 50obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
new file mode 100644
index 000000000000..b9283cc27622
--- /dev/null
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -0,0 +1,166 @@
1// SPDX-License-Identifier: GPL-2.0
2
3#include <linux/spinlock.h>
4#include <linux/percpu.h>
5
6#include <asm/cpu_entry_area.h>
7#include <asm/pgtable.h>
8#include <asm/fixmap.h>
9#include <asm/desc.h>
10
11static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
12
13#ifdef CONFIG_X86_64
14static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
15 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
16#endif
17
18struct cpu_entry_area *get_cpu_entry_area(int cpu)
19{
20 unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
21 BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
22
23 return (struct cpu_entry_area *) va;
24}
25EXPORT_SYMBOL(get_cpu_entry_area);
26
27void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
28{
29 unsigned long va = (unsigned long) cea_vaddr;
30
31 set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
32}
33
34static void __init
35cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
36{
37 for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
38 cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
39}
40
41static void percpu_setup_debug_store(int cpu)
42{
43#ifdef CONFIG_CPU_SUP_INTEL
44 int npages;
45 void *cea;
46
47 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
48 return;
49
50 cea = &get_cpu_entry_area(cpu)->cpu_debug_store;
51 npages = sizeof(struct debug_store) / PAGE_SIZE;
52 BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0);
53 cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages,
54 PAGE_KERNEL);
55
56 cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers;
57 /*
58 * Force the population of PMDs for not yet allocated per cpu
59 * memory like debug store buffers.
60 */
61 npages = sizeof(struct debug_store_buffers) / PAGE_SIZE;
62 for (; npages; npages--, cea += PAGE_SIZE)
63 cea_set_pte(cea, 0, PAGE_NONE);
64#endif
65}
66
67/* Setup the fixmap mappings only once per-processor */
68static void __init setup_cpu_entry_area(int cpu)
69{
70#ifdef CONFIG_X86_64
71 extern char _entry_trampoline[];
72
73 /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
74 pgprot_t gdt_prot = PAGE_KERNEL_RO;
75 pgprot_t tss_prot = PAGE_KERNEL_RO;
76#else
77 /*
78 * On native 32-bit systems, the GDT cannot be read-only because
79 * our double fault handler uses a task gate, and entering through
80 * a task gate needs to change an available TSS to busy. If the
81 * GDT is read-only, that will triple fault. The TSS cannot be
82 * read-only because the CPU writes to it on task switches.
83 *
84 * On Xen PV, the GDT must be read-only because the hypervisor
85 * requires it.
86 */
87 pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
88 PAGE_KERNEL_RO : PAGE_KERNEL;
89 pgprot_t tss_prot = PAGE_KERNEL;
90#endif
91
92 cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
93 gdt_prot);
94
95 cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
96 per_cpu_ptr(&entry_stack_storage, cpu), 1,
97 PAGE_KERNEL);
98
99 /*
100 * The Intel SDM says (Volume 3, 7.2.1):
101 *
102 * Avoid placing a page boundary in the part of the TSS that the
103 * processor reads during a task switch (the first 104 bytes). The
104 * processor may not correctly perform address translations if a
105 * boundary occurs in this area. During a task switch, the processor
106 * reads and writes into the first 104 bytes of each TSS (using
107 * contiguous physical addresses beginning with the physical address
108 * of the first byte of the TSS). So, after TSS access begins, if
109 * part of the 104 bytes is not physically contiguous, the processor
110 * will access incorrect information without generating a page-fault
111 * exception.
112 *
113 * There are also a lot of errata involving the TSS spanning a page
114 * boundary. Assert that we're not doing that.
115 */
116 BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
117 offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
118 BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
119 cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
120 &per_cpu(cpu_tss_rw, cpu),
121 sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
122
123#ifdef CONFIG_X86_32
124 per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
125#endif
126
127#ifdef CONFIG_X86_64
128 BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
129 BUILD_BUG_ON(sizeof(exception_stacks) !=
130 sizeof(((struct cpu_entry_area *)0)->exception_stacks));
131 cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
132 &per_cpu(exception_stacks, cpu),
133 sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
134
135 cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
136 __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
137#endif
138 percpu_setup_debug_store(cpu);
139}
140
141static __init void setup_cpu_entry_area_ptes(void)
142{
143#ifdef CONFIG_X86_32
144 unsigned long start, end;
145
146 BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
147 BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
148
149 start = CPU_ENTRY_AREA_BASE;
150 end = start + CPU_ENTRY_AREA_MAP_SIZE;
151
152 /* Careful here: start + PMD_SIZE might wrap around */
153 for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE)
154 populate_extra_pte(start);
155#endif
156}
157
158void __init setup_cpu_entry_areas(void)
159{
160 unsigned int cpu;
161
162 setup_cpu_entry_area_ptes();
163
164 for_each_possible_cpu(cpu)
165 setup_cpu_entry_area(cpu);
166}
diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
index bfcffdf6c577..421f2664ffa0 100644
--- a/arch/x86/mm/debug_pagetables.c
+++ b/arch/x86/mm/debug_pagetables.c
@@ -5,7 +5,7 @@
5 5
6static int ptdump_show(struct seq_file *m, void *v) 6static int ptdump_show(struct seq_file *m, void *v)
7{ 7{
8 ptdump_walk_pgd_level(m, NULL); 8 ptdump_walk_pgd_level_debugfs(m, NULL, false);
9 return 0; 9 return 0;
10} 10}
11 11
@@ -22,21 +22,89 @@ static const struct file_operations ptdump_fops = {
22 .release = single_release, 22 .release = single_release,
23}; 23};
24 24
25static struct dentry *pe; 25static int ptdump_show_curknl(struct seq_file *m, void *v)
26{
27 if (current->mm->pgd) {
28 down_read(&current->mm->mmap_sem);
29 ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, false);
30 up_read(&current->mm->mmap_sem);
31 }
32 return 0;
33}
34
35static int ptdump_open_curknl(struct inode *inode, struct file *filp)
36{
37 return single_open(filp, ptdump_show_curknl, NULL);
38}
39
40static const struct file_operations ptdump_curknl_fops = {
41 .owner = THIS_MODULE,
42 .open = ptdump_open_curknl,
43 .read = seq_read,
44 .llseek = seq_lseek,
45 .release = single_release,
46};
47
48#ifdef CONFIG_PAGE_TABLE_ISOLATION
49static struct dentry *pe_curusr;
50
51static int ptdump_show_curusr(struct seq_file *m, void *v)
52{
53 if (current->mm->pgd) {
54 down_read(&current->mm->mmap_sem);
55 ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, true);
56 up_read(&current->mm->mmap_sem);
57 }
58 return 0;
59}
60
61static int ptdump_open_curusr(struct inode *inode, struct file *filp)
62{
63 return single_open(filp, ptdump_show_curusr, NULL);
64}
65
66static const struct file_operations ptdump_curusr_fops = {
67 .owner = THIS_MODULE,
68 .open = ptdump_open_curusr,
69 .read = seq_read,
70 .llseek = seq_lseek,
71 .release = single_release,
72};
73#endif
74
75static struct dentry *dir, *pe_knl, *pe_curknl;
26 76
27static int __init pt_dump_debug_init(void) 77static int __init pt_dump_debug_init(void)
28{ 78{
29 pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL, 79 dir = debugfs_create_dir("page_tables", NULL);
30 &ptdump_fops); 80 if (!dir)
31 if (!pe)
32 return -ENOMEM; 81 return -ENOMEM;
33 82
83 pe_knl = debugfs_create_file("kernel", 0400, dir, NULL,
84 &ptdump_fops);
85 if (!pe_knl)
86 goto err;
87
88 pe_curknl = debugfs_create_file("current_kernel", 0400,
89 dir, NULL, &ptdump_curknl_fops);
90 if (!pe_curknl)
91 goto err;
92
93#ifdef CONFIG_PAGE_TABLE_ISOLATION
94 pe_curusr = debugfs_create_file("current_user", 0400,
95 dir, NULL, &ptdump_curusr_fops);
96 if (!pe_curusr)
97 goto err;
98#endif
34 return 0; 99 return 0;
100err:
101 debugfs_remove_recursive(dir);
102 return -ENOMEM;
35} 103}
36 104
37static void __exit pt_dump_debug_exit(void) 105static void __exit pt_dump_debug_exit(void)
38{ 106{
39 debugfs_remove_recursive(pe); 107 debugfs_remove_recursive(dir);
40} 108}
41 109
42module_init(pt_dump_debug_init); 110module_init(pt_dump_debug_init);
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 5e3ac6fe6c9e..f56902c1f04b 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -44,68 +44,97 @@ struct addr_marker {
44 unsigned long max_lines; 44 unsigned long max_lines;
45}; 45};
46 46
47/* indices for address_markers; keep sync'd w/ address_markers below */ 47/* Address space markers hints */
48
49#ifdef CONFIG_X86_64
50
48enum address_markers_idx { 51enum address_markers_idx {
49 USER_SPACE_NR = 0, 52 USER_SPACE_NR = 0,
50#ifdef CONFIG_X86_64
51 KERNEL_SPACE_NR, 53 KERNEL_SPACE_NR,
52 LOW_KERNEL_NR, 54 LOW_KERNEL_NR,
55#if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL)
56 LDT_NR,
57#endif
53 VMALLOC_START_NR, 58 VMALLOC_START_NR,
54 VMEMMAP_START_NR, 59 VMEMMAP_START_NR,
55#ifdef CONFIG_KASAN 60#ifdef CONFIG_KASAN
56 KASAN_SHADOW_START_NR, 61 KASAN_SHADOW_START_NR,
57 KASAN_SHADOW_END_NR, 62 KASAN_SHADOW_END_NR,
58#endif 63#endif
59# ifdef CONFIG_X86_ESPFIX64 64#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
65 LDT_NR,
66#endif
67 CPU_ENTRY_AREA_NR,
68#ifdef CONFIG_X86_ESPFIX64
60 ESPFIX_START_NR, 69 ESPFIX_START_NR,
61# endif 70#endif
71#ifdef CONFIG_EFI
72 EFI_END_NR,
73#endif
62 HIGH_KERNEL_NR, 74 HIGH_KERNEL_NR,
63 MODULES_VADDR_NR, 75 MODULES_VADDR_NR,
64 MODULES_END_NR, 76 MODULES_END_NR,
65#else 77 FIXADDR_START_NR,
78 END_OF_SPACE_NR,
79};
80
81static struct addr_marker address_markers[] = {
82 [USER_SPACE_NR] = { 0, "User Space" },
83 [KERNEL_SPACE_NR] = { (1UL << 63), "Kernel Space" },
84 [LOW_KERNEL_NR] = { 0UL, "Low Kernel Mapping" },
85 [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
86 [VMEMMAP_START_NR] = { 0UL, "Vmemmap" },
87#ifdef CONFIG_KASAN
88 [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
89 [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" },
90#endif
91#ifdef CONFIG_MODIFY_LDT_SYSCALL
92 [LDT_NR] = { LDT_BASE_ADDR, "LDT remap" },
93#endif
94 [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
95#ifdef CONFIG_X86_ESPFIX64
96 [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
97#endif
98#ifdef CONFIG_EFI
99 [EFI_END_NR] = { EFI_VA_END, "EFI Runtime Services" },
100#endif
101 [HIGH_KERNEL_NR] = { __START_KERNEL_map, "High Kernel Mapping" },
102 [MODULES_VADDR_NR] = { MODULES_VADDR, "Modules" },
103 [MODULES_END_NR] = { MODULES_END, "End Modules" },
104 [FIXADDR_START_NR] = { FIXADDR_START, "Fixmap Area" },
105 [END_OF_SPACE_NR] = { -1, NULL }
106};
107
108#else /* CONFIG_X86_64 */
109
110enum address_markers_idx {
111 USER_SPACE_NR = 0,
66 KERNEL_SPACE_NR, 112 KERNEL_SPACE_NR,
67 VMALLOC_START_NR, 113 VMALLOC_START_NR,
68 VMALLOC_END_NR, 114 VMALLOC_END_NR,
69# ifdef CONFIG_HIGHMEM 115#ifdef CONFIG_HIGHMEM
70 PKMAP_BASE_NR, 116 PKMAP_BASE_NR,
71# endif
72 FIXADDR_START_NR,
73#endif 117#endif
118 CPU_ENTRY_AREA_NR,
119 FIXADDR_START_NR,
120 END_OF_SPACE_NR,
74}; 121};
75 122
76/* Address space markers hints */
77static struct addr_marker address_markers[] = { 123static struct addr_marker address_markers[] = {
78 { 0, "User Space" }, 124 [USER_SPACE_NR] = { 0, "User Space" },
79#ifdef CONFIG_X86_64 125 [KERNEL_SPACE_NR] = { PAGE_OFFSET, "Kernel Mapping" },
80 { 0x8000000000000000UL, "Kernel Space" }, 126 [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
81 { 0/* PAGE_OFFSET */, "Low Kernel Mapping" }, 127 [VMALLOC_END_NR] = { 0UL, "vmalloc() End" },
82 { 0/* VMALLOC_START */, "vmalloc() Area" }, 128#ifdef CONFIG_HIGHMEM
83 { 0/* VMEMMAP_START */, "Vmemmap" }, 129 [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" },
84#ifdef CONFIG_KASAN
85 { KASAN_SHADOW_START, "KASAN shadow" },
86 { KASAN_SHADOW_END, "KASAN shadow end" },
87#endif 130#endif
88# ifdef CONFIG_X86_ESPFIX64 131 [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" },
89 { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, 132 [FIXADDR_START_NR] = { 0UL, "Fixmap area" },
90# endif 133 [END_OF_SPACE_NR] = { -1, NULL }
91# ifdef CONFIG_EFI
92 { EFI_VA_END, "EFI Runtime Services" },
93# endif
94 { __START_KERNEL_map, "High Kernel Mapping" },
95 { MODULES_VADDR, "Modules" },
96 { MODULES_END, "End Modules" },
97#else
98 { PAGE_OFFSET, "Kernel Mapping" },
99 { 0/* VMALLOC_START */, "vmalloc() Area" },
100 { 0/*VMALLOC_END*/, "vmalloc() End" },
101# ifdef CONFIG_HIGHMEM
102 { 0/*PKMAP_BASE*/, "Persistent kmap() Area" },
103# endif
104 { 0/*FIXADDR_START*/, "Fixmap Area" },
105#endif
106 { -1, NULL } /* End of list */
107}; 134};
108 135
136#endif /* !CONFIG_X86_64 */
137
109/* Multipliers for offsets within the PTEs */ 138/* Multipliers for offsets within the PTEs */
110#define PTE_LEVEL_MULT (PAGE_SIZE) 139#define PTE_LEVEL_MULT (PAGE_SIZE)
111#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) 140#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
@@ -140,7 +169,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
140 static const char * const level_name[] = 169 static const char * const level_name[] =
141 { "cr3", "pgd", "p4d", "pud", "pmd", "pte" }; 170 { "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
142 171
143 if (!pgprot_val(prot)) { 172 if (!(pr & _PAGE_PRESENT)) {
144 /* Not present */ 173 /* Not present */
145 pt_dump_cont_printf(m, dmsg, " "); 174 pt_dump_cont_printf(m, dmsg, " ");
146 } else { 175 } else {
@@ -447,7 +476,7 @@ static inline bool is_hypervisor_range(int idx)
447} 476}
448 477
449static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, 478static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
450 bool checkwx) 479 bool checkwx, bool dmesg)
451{ 480{
452#ifdef CONFIG_X86_64 481#ifdef CONFIG_X86_64
453 pgd_t *start = (pgd_t *) &init_top_pgt; 482 pgd_t *start = (pgd_t *) &init_top_pgt;
@@ -460,7 +489,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
460 489
461 if (pgd) { 490 if (pgd) {
462 start = pgd; 491 start = pgd;
463 st.to_dmesg = true; 492 st.to_dmesg = dmesg;
464 } 493 }
465 494
466 st.check_wx = checkwx; 495 st.check_wx = checkwx;
@@ -498,13 +527,37 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
498 527
499void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) 528void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
500{ 529{
501 ptdump_walk_pgd_level_core(m, pgd, false); 530 ptdump_walk_pgd_level_core(m, pgd, false, true);
531}
532
533void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user)
534{
535#ifdef CONFIG_PAGE_TABLE_ISOLATION
536 if (user && static_cpu_has(X86_FEATURE_PTI))
537 pgd = kernel_to_user_pgdp(pgd);
538#endif
539 ptdump_walk_pgd_level_core(m, pgd, false, false);
540}
541EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs);
542
543static void ptdump_walk_user_pgd_level_checkwx(void)
544{
545#ifdef CONFIG_PAGE_TABLE_ISOLATION
546 pgd_t *pgd = (pgd_t *) &init_top_pgt;
547
548 if (!static_cpu_has(X86_FEATURE_PTI))
549 return;
550
551 pr_info("x86/mm: Checking user space page tables\n");
552 pgd = kernel_to_user_pgdp(pgd);
553 ptdump_walk_pgd_level_core(NULL, pgd, true, false);
554#endif
502} 555}
503EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level);
504 556
505void ptdump_walk_pgd_level_checkwx(void) 557void ptdump_walk_pgd_level_checkwx(void)
506{ 558{
507 ptdump_walk_pgd_level_core(NULL, NULL, true); 559 ptdump_walk_pgd_level_core(NULL, NULL, true, false);
560 ptdump_walk_user_pgd_level_checkwx();
508} 561}
509 562
510static int __init pt_dump_init(void) 563static int __init pt_dump_init(void)
@@ -525,8 +578,8 @@ static int __init pt_dump_init(void)
525 address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; 578 address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
526# endif 579# endif
527 address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; 580 address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
581 address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
528#endif 582#endif
529
530 return 0; 583 return 0;
531} 584}
532__initcall(pt_dump_init); 585__initcall(pt_dump_init);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index febf6980e653..06fe3d51d385 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -860,7 +860,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
860 if (!printk_ratelimit()) 860 if (!printk_ratelimit())
861 return; 861 return;
862 862
863 printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx", 863 printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
864 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 864 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
865 tsk->comm, task_pid_nr(tsk), address, 865 tsk->comm, task_pid_nr(tsk), address,
866 (void *)regs->ip, (void *)regs->sp, error_code); 866 (void *)regs->ip, (void *)regs->sp, error_code);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 6fdf91ef130a..8ca324d07282 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -20,6 +20,7 @@
20#include <asm/kaslr.h> 20#include <asm/kaslr.h>
21#include <asm/hypervisor.h> 21#include <asm/hypervisor.h>
22#include <asm/cpufeature.h> 22#include <asm/cpufeature.h>
23#include <asm/pti.h>
23 24
24/* 25/*
25 * We need to define the tracepoints somewhere, and tlb.c 26 * We need to define the tracepoints somewhere, and tlb.c
@@ -160,6 +161,12 @@ struct map_range {
160 161
161static int page_size_mask; 162static int page_size_mask;
162 163
164static void enable_global_pages(void)
165{
166 if (!static_cpu_has(X86_FEATURE_PTI))
167 __supported_pte_mask |= _PAGE_GLOBAL;
168}
169
163static void __init probe_page_size_mask(void) 170static void __init probe_page_size_mask(void)
164{ 171{
165 /* 172 /*
@@ -177,11 +184,11 @@ static void __init probe_page_size_mask(void)
177 cr4_set_bits_and_update_boot(X86_CR4_PSE); 184 cr4_set_bits_and_update_boot(X86_CR4_PSE);
178 185
179 /* Enable PGE if available */ 186 /* Enable PGE if available */
187 __supported_pte_mask &= ~_PAGE_GLOBAL;
180 if (boot_cpu_has(X86_FEATURE_PGE)) { 188 if (boot_cpu_has(X86_FEATURE_PGE)) {
181 cr4_set_bits_and_update_boot(X86_CR4_PGE); 189 cr4_set_bits_and_update_boot(X86_CR4_PGE);
182 __supported_pte_mask |= _PAGE_GLOBAL; 190 enable_global_pages();
183 } else 191 }
184 __supported_pte_mask &= ~_PAGE_GLOBAL;
185 192
186 /* Enable 1 GB linear kernel mappings if available: */ 193 /* Enable 1 GB linear kernel mappings if available: */
187 if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) { 194 if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) {
@@ -194,34 +201,44 @@ static void __init probe_page_size_mask(void)
194 201
195static void setup_pcid(void) 202static void setup_pcid(void)
196{ 203{
197#ifdef CONFIG_X86_64 204 if (!IS_ENABLED(CONFIG_X86_64))
198 if (boot_cpu_has(X86_FEATURE_PCID)) { 205 return;
199 if (boot_cpu_has(X86_FEATURE_PGE)) { 206
200 /* 207 if (!boot_cpu_has(X86_FEATURE_PCID))
201 * This can't be cr4_set_bits_and_update_boot() -- 208 return;
202 * the trampoline code can't handle CR4.PCIDE and 209
203 * it wouldn't do any good anyway. Despite the name, 210 if (boot_cpu_has(X86_FEATURE_PGE)) {
204 * cr4_set_bits_and_update_boot() doesn't actually 211 /*
205 * cause the bits in question to remain set all the 212 * This can't be cr4_set_bits_and_update_boot() -- the
206 * way through the secondary boot asm. 213 * trampoline code can't handle CR4.PCIDE and it wouldn't
207 * 214 * do any good anyway. Despite the name,
208 * Instead, we brute-force it and set CR4.PCIDE 215 * cr4_set_bits_and_update_boot() doesn't actually cause
209 * manually in start_secondary(). 216 * the bits in question to remain set all the way through
210 */ 217 * the secondary boot asm.
211 cr4_set_bits(X86_CR4_PCIDE); 218 *
212 } else { 219 * Instead, we brute-force it and set CR4.PCIDE manually in
213 /* 220 * start_secondary().
214 * flush_tlb_all(), as currently implemented, won't 221 */
215 * work if PCID is on but PGE is not. Since that 222 cr4_set_bits(X86_CR4_PCIDE);
216 * combination doesn't exist on real hardware, there's 223
217 * no reason to try to fully support it, but it's 224 /*
218 * polite to avoid corrupting data if we're on 225 * INVPCID's single-context modes (2/3) only work if we set
219 * an improperly configured VM. 226 * X86_CR4_PCIDE, *and* we INVPCID support. It's unusable
220 */ 227 * on systems that have X86_CR4_PCIDE clear, or that have
221 setup_clear_cpu_cap(X86_FEATURE_PCID); 228 * no INVPCID support at all.
222 } 229 */
230 if (boot_cpu_has(X86_FEATURE_INVPCID))
231 setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
232 } else {
233 /*
234 * flush_tlb_all(), as currently implemented, won't work if
235 * PCID is on but PGE is not. Since that combination
236 * doesn't exist on real hardware, there's no reason to try
237 * to fully support it, but it's polite to avoid corrupting
238 * data if we're on an improperly configured VM.
239 */
240 setup_clear_cpu_cap(X86_FEATURE_PCID);
223 } 241 }
224#endif
225} 242}
226 243
227#ifdef CONFIG_X86_32 244#ifdef CONFIG_X86_32
@@ -622,6 +639,7 @@ void __init init_mem_mapping(void)
622{ 639{
623 unsigned long end; 640 unsigned long end;
624 641
642 pti_check_boottime_disable();
625 probe_page_size_mask(); 643 probe_page_size_mask();
626 setup_pcid(); 644 setup_pcid();
627 645
@@ -845,7 +863,7 @@ void __init zone_sizes_init(void)
845 free_area_init_nodes(max_zone_pfns); 863 free_area_init_nodes(max_zone_pfns);
846} 864}
847 865
848DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { 866__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
849 .loaded_mm = &init_mm, 867 .loaded_mm = &init_mm,
850 .next_asid = 1, 868 .next_asid = 1,
851 .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ 869 .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8a64a6f2848d..135c9a7898c7 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -50,6 +50,7 @@
50#include <asm/setup.h> 50#include <asm/setup.h>
51#include <asm/set_memory.h> 51#include <asm/set_memory.h>
52#include <asm/page_types.h> 52#include <asm/page_types.h>
53#include <asm/cpu_entry_area.h>
53#include <asm/init.h> 54#include <asm/init.h>
54 55
55#include "mm_internal.h" 56#include "mm_internal.h"
@@ -766,6 +767,7 @@ void __init mem_init(void)
766 mem_init_print_info(NULL); 767 mem_init_print_info(NULL);
767 printk(KERN_INFO "virtual kernel memory layout:\n" 768 printk(KERN_INFO "virtual kernel memory layout:\n"
768 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" 769 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
770 " cpu_entry : 0x%08lx - 0x%08lx (%4ld kB)\n"
769#ifdef CONFIG_HIGHMEM 771#ifdef CONFIG_HIGHMEM
770 " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" 772 " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
771#endif 773#endif
@@ -777,6 +779,10 @@ void __init mem_init(void)
777 FIXADDR_START, FIXADDR_TOP, 779 FIXADDR_START, FIXADDR_TOP,
778 (FIXADDR_TOP - FIXADDR_START) >> 10, 780 (FIXADDR_TOP - FIXADDR_START) >> 10,
779 781
782 CPU_ENTRY_AREA_BASE,
783 CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
784 CPU_ENTRY_AREA_MAP_SIZE >> 10,
785
780#ifdef CONFIG_HIGHMEM 786#ifdef CONFIG_HIGHMEM
781 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, 787 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
782 (LAST_PKMAP*PAGE_SIZE) >> 10, 788 (LAST_PKMAP*PAGE_SIZE) >> 10,
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 6e4573b1da34..c45b6ec5357b 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -404,11 +404,11 @@ void iounmap(volatile void __iomem *addr)
404 return; 404 return;
405 } 405 }
406 406
407 mmiotrace_iounmap(addr);
408
407 addr = (volatile void __iomem *) 409 addr = (volatile void __iomem *)
408 (PAGE_MASK & (unsigned long __force)addr); 410 (PAGE_MASK & (unsigned long __force)addr);
409 411
410 mmiotrace_iounmap(addr);
411
412 /* Use the vm area unlocked, assuming the caller 412 /* Use the vm area unlocked, assuming the caller
413 ensures there isn't another iounmap for the same address 413 ensures there isn't another iounmap for the same address
414 in parallel. Reuse of the virtual address is prevented by 414 in parallel. Reuse of the virtual address is prevented by
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 99dfed6dfef8..47388f0c0e59 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -15,6 +15,7 @@
15#include <asm/tlbflush.h> 15#include <asm/tlbflush.h>
16#include <asm/sections.h> 16#include <asm/sections.h>
17#include <asm/pgtable.h> 17#include <asm/pgtable.h>
18#include <asm/cpu_entry_area.h>
18 19
19extern struct range pfn_mapped[E820_MAX_ENTRIES]; 20extern struct range pfn_mapped[E820_MAX_ENTRIES];
20 21
@@ -277,6 +278,7 @@ void __init kasan_early_init(void)
277void __init kasan_init(void) 278void __init kasan_init(void)
278{ 279{
279 int i; 280 int i;
281 void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
280 282
281#ifdef CONFIG_KASAN_INLINE 283#ifdef CONFIG_KASAN_INLINE
282 register_die_notifier(&kasan_die_notifier); 284 register_die_notifier(&kasan_die_notifier);
@@ -321,16 +323,33 @@ void __init kasan_init(void)
321 map_range(&pfn_mapped[i]); 323 map_range(&pfn_mapped[i]);
322 } 324 }
323 325
326 shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
327 shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
328 shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
329 PAGE_SIZE);
330
331 shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
332 CPU_ENTRY_AREA_MAP_SIZE);
333 shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
334 shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
335 PAGE_SIZE);
336
324 kasan_populate_zero_shadow( 337 kasan_populate_zero_shadow(
325 kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), 338 kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
326 kasan_mem_to_shadow((void *)__START_KERNEL_map)); 339 shadow_cpu_entry_begin);
340
341 kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
342 (unsigned long)shadow_cpu_entry_end, 0);
343
344 kasan_populate_zero_shadow(shadow_cpu_entry_end,
345 kasan_mem_to_shadow((void *)__START_KERNEL_map));
327 346
328 kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), 347 kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
329 (unsigned long)kasan_mem_to_shadow(_end), 348 (unsigned long)kasan_mem_to_shadow(_end),
330 early_pfn_to_nid(__pa(_stext))); 349 early_pfn_to_nid(__pa(_stext)));
331 350
332 kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), 351 kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
333 (void *)KASAN_SHADOW_END); 352 (void *)KASAN_SHADOW_END);
334 353
335 load_cr3(init_top_pgt); 354 load_cr3(init_top_pgt);
336 __flush_tlb_all(); 355 __flush_tlb_all();
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index c21c2ed04612..58477ec3d66d 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -435,17 +435,18 @@ int register_kmmio_probe(struct kmmio_probe *p)
435 unsigned long flags; 435 unsigned long flags;
436 int ret = 0; 436 int ret = 0;
437 unsigned long size = 0; 437 unsigned long size = 0;
438 unsigned long addr = p->addr & PAGE_MASK;
438 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); 439 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
439 unsigned int l; 440 unsigned int l;
440 pte_t *pte; 441 pte_t *pte;
441 442
442 spin_lock_irqsave(&kmmio_lock, flags); 443 spin_lock_irqsave(&kmmio_lock, flags);
443 if (get_kmmio_probe(p->addr)) { 444 if (get_kmmio_probe(addr)) {
444 ret = -EEXIST; 445 ret = -EEXIST;
445 goto out; 446 goto out;
446 } 447 }
447 448
448 pte = lookup_address(p->addr, &l); 449 pte = lookup_address(addr, &l);
449 if (!pte) { 450 if (!pte) {
450 ret = -EINVAL; 451 ret = -EINVAL;
451 goto out; 452 goto out;
@@ -454,7 +455,7 @@ int register_kmmio_probe(struct kmmio_probe *p)
454 kmmio_count++; 455 kmmio_count++;
455 list_add_rcu(&p->list, &kmmio_probes); 456 list_add_rcu(&p->list, &kmmio_probes);
456 while (size < size_lim) { 457 while (size < size_lim) {
457 if (add_kmmio_fault_page(p->addr + size)) 458 if (add_kmmio_fault_page(addr + size))
458 pr_err("Unable to set page fault.\n"); 459 pr_err("Unable to set page fault.\n");
459 size += page_level_size(l); 460 size += page_level_size(l);
460 } 461 }
@@ -528,19 +529,20 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
528{ 529{
529 unsigned long flags; 530 unsigned long flags;
530 unsigned long size = 0; 531 unsigned long size = 0;
532 unsigned long addr = p->addr & PAGE_MASK;
531 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); 533 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
532 struct kmmio_fault_page *release_list = NULL; 534 struct kmmio_fault_page *release_list = NULL;
533 struct kmmio_delayed_release *drelease; 535 struct kmmio_delayed_release *drelease;
534 unsigned int l; 536 unsigned int l;
535 pte_t *pte; 537 pte_t *pte;
536 538
537 pte = lookup_address(p->addr, &l); 539 pte = lookup_address(addr, &l);
538 if (!pte) 540 if (!pte)
539 return; 541 return;
540 542
541 spin_lock_irqsave(&kmmio_lock, flags); 543 spin_lock_irqsave(&kmmio_lock, flags);
542 while (size < size_lim) { 544 while (size < size_lim) {
543 release_kmmio_fault_page(p->addr + size, &release_list); 545 release_kmmio_fault_page(addr + size, &release_list);
544 size += page_level_size(l); 546 size += page_level_size(l);
545 } 547 }
546 list_del_rcu(&p->list); 548 list_del_rcu(&p->list);
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index d9a9e9fc75dd..391b13402e40 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -405,13 +405,13 @@ bool sme_active(void)
405{ 405{
406 return sme_me_mask && !sev_enabled; 406 return sme_me_mask && !sev_enabled;
407} 407}
408EXPORT_SYMBOL_GPL(sme_active); 408EXPORT_SYMBOL(sme_active);
409 409
410bool sev_active(void) 410bool sev_active(void)
411{ 411{
412 return sme_me_mask && sev_enabled; 412 return sme_me_mask && sev_enabled;
413} 413}
414EXPORT_SYMBOL_GPL(sev_active); 414EXPORT_SYMBOL(sev_active);
415 415
416static const struct dma_map_ops sev_dma_ops = { 416static const struct dma_map_ops sev_dma_ops = {
417 .alloc = sev_alloc, 417 .alloc = sev_alloc,
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 96d456a94b03..004abf9ebf12 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -355,14 +355,15 @@ static inline void _pgd_free(pgd_t *pgd)
355 kmem_cache_free(pgd_cache, pgd); 355 kmem_cache_free(pgd_cache, pgd);
356} 356}
357#else 357#else
358
358static inline pgd_t *_pgd_alloc(void) 359static inline pgd_t *_pgd_alloc(void)
359{ 360{
360 return (pgd_t *)__get_free_page(PGALLOC_GFP); 361 return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
361} 362}
362 363
363static inline void _pgd_free(pgd_t *pgd) 364static inline void _pgd_free(pgd_t *pgd)
364{ 365{
365 free_page((unsigned long)pgd); 366 free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
366} 367}
367#endif /* CONFIG_X86_PAE */ 368#endif /* CONFIG_X86_PAE */
368 369
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 6b9bf023a700..c3c5274410a9 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -10,6 +10,7 @@
10#include <linux/pagemap.h> 10#include <linux/pagemap.h>
11#include <linux/spinlock.h> 11#include <linux/spinlock.h>
12 12
13#include <asm/cpu_entry_area.h>
13#include <asm/pgtable.h> 14#include <asm/pgtable.h>
14#include <asm/pgalloc.h> 15#include <asm/pgalloc.h>
15#include <asm/fixmap.h> 16#include <asm/fixmap.h>
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
new file mode 100644
index 000000000000..bce8aea65606
--- /dev/null
+++ b/arch/x86/mm/pti.c
@@ -0,0 +1,387 @@
1/*
2 * Copyright(c) 2017 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * This code is based in part on work published here:
14 *
15 * https://github.com/IAIK/KAISER
16 *
17 * The original work was written by and and signed off by for the Linux
18 * kernel by:
19 *
20 * Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at>
21 * Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
22 * Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
23 * Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
24 *
25 * Major changes to the original code by: Dave Hansen <dave.hansen@intel.com>
26 * Mostly rewritten by Thomas Gleixner <tglx@linutronix.de> and
27 * Andy Lutomirsky <luto@amacapital.net>
28 */
29#include <linux/kernel.h>
30#include <linux/errno.h>
31#include <linux/string.h>
32#include <linux/types.h>
33#include <linux/bug.h>
34#include <linux/init.h>
35#include <linux/spinlock.h>
36#include <linux/mm.h>
37#include <linux/uaccess.h>
38
39#include <asm/cpufeature.h>
40#include <asm/hypervisor.h>
41#include <asm/vsyscall.h>
42#include <asm/cmdline.h>
43#include <asm/pti.h>
44#include <asm/pgtable.h>
45#include <asm/pgalloc.h>
46#include <asm/tlbflush.h>
47#include <asm/desc.h>
48
49#undef pr_fmt
50#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt
51
52/* Backporting helper */
53#ifndef __GFP_NOTRACK
54#define __GFP_NOTRACK 0
55#endif
56
57static void __init pti_print_if_insecure(const char *reason)
58{
59 if (boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
60 pr_info("%s\n", reason);
61}
62
63static void __init pti_print_if_secure(const char *reason)
64{
65 if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
66 pr_info("%s\n", reason);
67}
68
69void __init pti_check_boottime_disable(void)
70{
71 char arg[5];
72 int ret;
73
74 if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
75 pti_print_if_insecure("disabled on XEN PV.");
76 return;
77 }
78
79 ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
80 if (ret > 0) {
81 if (ret == 3 && !strncmp(arg, "off", 3)) {
82 pti_print_if_insecure("disabled on command line.");
83 return;
84 }
85 if (ret == 2 && !strncmp(arg, "on", 2)) {
86 pti_print_if_secure("force enabled on command line.");
87 goto enable;
88 }
89 if (ret == 4 && !strncmp(arg, "auto", 4))
90 goto autosel;
91 }
92
93 if (cmdline_find_option_bool(boot_command_line, "nopti")) {
94 pti_print_if_insecure("disabled on command line.");
95 return;
96 }
97
98autosel:
99 if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
100 return;
101enable:
102 setup_force_cpu_cap(X86_FEATURE_PTI);
103}
104
105pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
106{
107 /*
108 * Changes to the high (kernel) portion of the kernelmode page
109 * tables are not automatically propagated to the usermode tables.
110 *
111 * Users should keep in mind that, unlike the kernelmode tables,
112 * there is no vmalloc_fault equivalent for the usermode tables.
113 * Top-level entries added to init_mm's usermode pgd after boot
114 * will not be automatically propagated to other mms.
115 */
116 if (!pgdp_maps_userspace(pgdp))
117 return pgd;
118
119 /*
120 * The user page tables get the full PGD, accessible from
121 * userspace:
122 */
123 kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
124
125 /*
126 * If this is normal user memory, make it NX in the kernel
127 * pagetables so that, if we somehow screw up and return to
128 * usermode with the kernel CR3 loaded, we'll get a page fault
129 * instead of allowing user code to execute with the wrong CR3.
130 *
131 * As exceptions, we don't set NX if:
132 * - _PAGE_USER is not set. This could be an executable
133 * EFI runtime mapping or something similar, and the kernel
134 * may execute from it
135 * - we don't have NX support
136 * - we're clearing the PGD (i.e. the new pgd is not present).
137 */
138 if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) &&
139 (__supported_pte_mask & _PAGE_NX))
140 pgd.pgd |= _PAGE_NX;
141
142 /* return the copy of the PGD we want the kernel to use: */
143 return pgd;
144}
145
146/*
147 * Walk the user copy of the page tables (optionally) trying to allocate
148 * page table pages on the way down.
149 *
150 * Returns a pointer to a P4D on success, or NULL on failure.
151 */
152static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
153{
154 pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
155 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
156
157 if (address < PAGE_OFFSET) {
158 WARN_ONCE(1, "attempt to walk user address\n");
159 return NULL;
160 }
161
162 if (pgd_none(*pgd)) {
163 unsigned long new_p4d_page = __get_free_page(gfp);
164 if (!new_p4d_page)
165 return NULL;
166
167 if (pgd_none(*pgd)) {
168 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
169 new_p4d_page = 0;
170 }
171 if (new_p4d_page)
172 free_page(new_p4d_page);
173 }
174 BUILD_BUG_ON(pgd_large(*pgd) != 0);
175
176 return p4d_offset(pgd, address);
177}
178
179/*
180 * Walk the user copy of the page tables (optionally) trying to allocate
181 * page table pages on the way down.
182 *
183 * Returns a pointer to a PMD on success, or NULL on failure.
184 */
185static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
186{
187 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
188 p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
189 pud_t *pud;
190
191 BUILD_BUG_ON(p4d_large(*p4d) != 0);
192 if (p4d_none(*p4d)) {
193 unsigned long new_pud_page = __get_free_page(gfp);
194 if (!new_pud_page)
195 return NULL;
196
197 if (p4d_none(*p4d)) {
198 set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
199 new_pud_page = 0;
200 }
201 if (new_pud_page)
202 free_page(new_pud_page);
203 }
204
205 pud = pud_offset(p4d, address);
206 /* The user page tables do not use large mappings: */
207 if (pud_large(*pud)) {
208 WARN_ON(1);
209 return NULL;
210 }
211 if (pud_none(*pud)) {
212 unsigned long new_pmd_page = __get_free_page(gfp);
213 if (!new_pmd_page)
214 return NULL;
215
216 if (pud_none(*pud)) {
217 set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
218 new_pmd_page = 0;
219 }
220 if (new_pmd_page)
221 free_page(new_pmd_page);
222 }
223
224 return pmd_offset(pud, address);
225}
226
227#ifdef CONFIG_X86_VSYSCALL_EMULATION
228/*
229 * Walk the shadow copy of the page tables (optionally) trying to allocate
230 * page table pages on the way down. Does not support large pages.
231 *
232 * Note: this is only used when mapping *new* kernel data into the
233 * user/shadow page tables. It is never used for userspace data.
234 *
235 * Returns a pointer to a PTE on success, or NULL on failure.
236 */
237static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
238{
239 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
240 pmd_t *pmd = pti_user_pagetable_walk_pmd(address);
241 pte_t *pte;
242
243 /* We can't do anything sensible if we hit a large mapping. */
244 if (pmd_large(*pmd)) {
245 WARN_ON(1);
246 return NULL;
247 }
248
249 if (pmd_none(*pmd)) {
250 unsigned long new_pte_page = __get_free_page(gfp);
251 if (!new_pte_page)
252 return NULL;
253
254 if (pmd_none(*pmd)) {
255 set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
256 new_pte_page = 0;
257 }
258 if (new_pte_page)
259 free_page(new_pte_page);
260 }
261
262 pte = pte_offset_kernel(pmd, address);
263 if (pte_flags(*pte) & _PAGE_USER) {
264 WARN_ONCE(1, "attempt to walk to user pte\n");
265 return NULL;
266 }
267 return pte;
268}
269
270static void __init pti_setup_vsyscall(void)
271{
272 pte_t *pte, *target_pte;
273 unsigned int level;
274
275 pte = lookup_address(VSYSCALL_ADDR, &level);
276 if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte))
277 return;
278
279 target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR);
280 if (WARN_ON(!target_pte))
281 return;
282
283 *target_pte = *pte;
284 set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir));
285}
286#else
287static void __init pti_setup_vsyscall(void) { }
288#endif
289
290static void __init
291pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
292{
293 unsigned long addr;
294
295 /*
296 * Clone the populated PMDs which cover start to end. These PMD areas
297 * can have holes.
298 */
299 for (addr = start; addr < end; addr += PMD_SIZE) {
300 pmd_t *pmd, *target_pmd;
301 pgd_t *pgd;
302 p4d_t *p4d;
303 pud_t *pud;
304
305 pgd = pgd_offset_k(addr);
306 if (WARN_ON(pgd_none(*pgd)))
307 return;
308 p4d = p4d_offset(pgd, addr);
309 if (WARN_ON(p4d_none(*p4d)))
310 return;
311 pud = pud_offset(p4d, addr);
312 if (pud_none(*pud))
313 continue;
314 pmd = pmd_offset(pud, addr);
315 if (pmd_none(*pmd))
316 continue;
317
318 target_pmd = pti_user_pagetable_walk_pmd(addr);
319 if (WARN_ON(!target_pmd))
320 return;
321
322 /*
323 * Copy the PMD. That is, the kernelmode and usermode
324 * tables will share the last-level page tables of this
325 * address range
326 */
327 *target_pmd = pmd_clear_flags(*pmd, clear);
328 }
329}
330
331/*
332 * Clone a single p4d (i.e. a top-level entry on 4-level systems and a
333 * next-level entry on 5-level systems.
334 */
335static void __init pti_clone_p4d(unsigned long addr)
336{
337 p4d_t *kernel_p4d, *user_p4d;
338 pgd_t *kernel_pgd;
339
340 user_p4d = pti_user_pagetable_walk_p4d(addr);
341 kernel_pgd = pgd_offset_k(addr);
342 kernel_p4d = p4d_offset(kernel_pgd, addr);
343 *user_p4d = *kernel_p4d;
344}
345
346/*
347 * Clone the CPU_ENTRY_AREA into the user space visible page table.
348 */
349static void __init pti_clone_user_shared(void)
350{
351 pti_clone_p4d(CPU_ENTRY_AREA_BASE);
352}
353
354/*
355 * Clone the ESPFIX P4D into the user space visinble page table
356 */
357static void __init pti_setup_espfix64(void)
358{
359#ifdef CONFIG_X86_ESPFIX64
360 pti_clone_p4d(ESPFIX_BASE_ADDR);
361#endif
362}
363
364/*
365 * Clone the populated PMDs of the entry and irqentry text and force it RO.
366 */
367static void __init pti_clone_entry_text(void)
368{
369 pti_clone_pmds((unsigned long) __entry_text_start,
370 (unsigned long) __irqentry_text_end, _PAGE_RW);
371}
372
373/*
374 * Initialize kernel page table isolation
375 */
376void __init pti_init(void)
377{
378 if (!static_cpu_has(X86_FEATURE_PTI))
379 return;
380
381 pr_info("enabled\n");
382
383 pti_clone_user_shared();
384 pti_clone_entry_text();
385 pti_setup_espfix64();
386 pti_setup_vsyscall();
387}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 3118392cdf75..a1561957dccb 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -28,6 +28,38 @@
28 * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi 28 * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
29 */ 29 */
30 30
31/*
32 * We get here when we do something requiring a TLB invalidation
33 * but could not go invalidate all of the contexts. We do the
34 * necessary invalidation by clearing out the 'ctx_id' which
35 * forces a TLB flush when the context is loaded.
36 */
37void clear_asid_other(void)
38{
39 u16 asid;
40
41 /*
42 * This is only expected to be set if we have disabled
43 * kernel _PAGE_GLOBAL pages.
44 */
45 if (!static_cpu_has(X86_FEATURE_PTI)) {
46 WARN_ON_ONCE(1);
47 return;
48 }
49
50 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
51 /* Do not need to flush the current asid */
52 if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
53 continue;
54 /*
55 * Make sure the next time we go to switch to
56 * this asid, we do a flush:
57 */
58 this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
59 }
60 this_cpu_write(cpu_tlbstate.invalidate_other, false);
61}
62
31atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); 63atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
32 64
33 65
@@ -42,6 +74,9 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
42 return; 74 return;
43 } 75 }
44 76
77 if (this_cpu_read(cpu_tlbstate.invalidate_other))
78 clear_asid_other();
79
45 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) { 80 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
46 if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) != 81 if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
47 next->context.ctx_id) 82 next->context.ctx_id)
@@ -65,6 +100,25 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
65 *need_flush = true; 100 *need_flush = true;
66} 101}
67 102
103static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
104{
105 unsigned long new_mm_cr3;
106
107 if (need_flush) {
108 invalidate_user_asid(new_asid);
109 new_mm_cr3 = build_cr3(pgdir, new_asid);
110 } else {
111 new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
112 }
113
114 /*
115 * Caution: many callers of this function expect
116 * that load_cr3() is serializing and orders TLB
117 * fills with respect to the mm_cpumask writes.
118 */
119 write_cr3(new_mm_cr3);
120}
121
68void leave_mm(int cpu) 122void leave_mm(int cpu)
69{ 123{
70 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); 124 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
@@ -128,7 +182,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
128 * isn't free. 182 * isn't free.
129 */ 183 */
130#ifdef CONFIG_DEBUG_VM 184#ifdef CONFIG_DEBUG_VM
131 if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) { 185 if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
132 /* 186 /*
133 * If we were to BUG here, we'd be very likely to kill 187 * If we were to BUG here, we'd be very likely to kill
134 * the system so hard that we don't see the call trace. 188 * the system so hard that we don't see the call trace.
@@ -195,7 +249,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
195 if (need_flush) { 249 if (need_flush) {
196 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); 250 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
197 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); 251 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
198 write_cr3(build_cr3(next, new_asid)); 252 load_new_mm_cr3(next->pgd, new_asid, true);
199 253
200 /* 254 /*
201 * NB: This gets called via leave_mm() in the idle path 255 * NB: This gets called via leave_mm() in the idle path
@@ -208,7 +262,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
208 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); 262 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
209 } else { 263 } else {
210 /* The new ASID is already up to date. */ 264 /* The new ASID is already up to date. */
211 write_cr3(build_cr3_noflush(next, new_asid)); 265 load_new_mm_cr3(next->pgd, new_asid, false);
212 266
213 /* See above wrt _rcuidle. */ 267 /* See above wrt _rcuidle. */
214 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); 268 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
@@ -288,7 +342,7 @@ void initialize_tlbstate_and_flush(void)
288 !(cr4_read_shadow() & X86_CR4_PCIDE)); 342 !(cr4_read_shadow() & X86_CR4_PCIDE));
289 343
290 /* Force ASID 0 and force a TLB flush. */ 344 /* Force ASID 0 and force a TLB flush. */
291 write_cr3(build_cr3(mm, 0)); 345 write_cr3(build_cr3(mm->pgd, 0));
292 346
293 /* Reinitialize tlbstate. */ 347 /* Reinitialize tlbstate. */
294 this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); 348 this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
@@ -551,7 +605,7 @@ static void do_kernel_range_flush(void *info)
551 605
552 /* flush range by one by one 'invlpg' */ 606 /* flush range by one by one 'invlpg' */
553 for (addr = f->start; addr < f->end; addr += PAGE_SIZE) 607 for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
554 __flush_tlb_single(addr); 608 __flush_tlb_one(addr);
555} 609}
556 610
557void flush_tlb_kernel_range(unsigned long start, unsigned long end) 611void flush_tlb_kernel_range(unsigned long start, unsigned long end)
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 1e996df687a3..e663d6bf1328 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -665,6 +665,16 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
665 unsigned i; 665 unsigned i;
666 u32 base, limit, high; 666 u32 base, limit, high;
667 struct resource *res, *conflict; 667 struct resource *res, *conflict;
668 struct pci_dev *other;
669
670 /* Check that we are the only device of that type */
671 other = pci_get_device(dev->vendor, dev->device, NULL);
672 if (other != dev ||
673 (other = pci_get_device(dev->vendor, dev->device, other))) {
674 /* This is a multi-socket system, don't touch it for now */
675 pci_dev_put(other);
676 return;
677 }
668 678
669 for (i = 0; i < 8; i++) { 679 for (i = 0; i < 8; i++) {
670 pci_read_config_dword(dev, AMD_141b_MMIO_BASE(i), &base); 680 pci_read_config_dword(dev, AMD_141b_MMIO_BASE(i), &base);
@@ -696,8 +706,13 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
696 res->end = 0xfd00000000ull - 1; 706 res->end = 0xfd00000000ull - 1;
697 707
698 /* Just grab the free area behind system memory for this */ 708 /* Just grab the free area behind system memory for this */
699 while ((conflict = request_resource_conflict(&iomem_resource, res))) 709 while ((conflict = request_resource_conflict(&iomem_resource, res))) {
710 if (conflict->end >= res->end) {
711 kfree(res);
712 return;
713 }
700 res->start = conflict->end + 1; 714 res->start = conflict->end + 1;
715 }
701 716
702 dev_info(&dev->dev, "adding root bus resource %pR\n", res); 717 dev_info(&dev->dev, "adding root bus resource %pR\n", res);
703 718
@@ -714,10 +729,10 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
714 729
715 pci_bus_add_resource(dev->bus, res, 0); 730 pci_bus_add_resource(dev->bus, res, 0);
716} 731}
717DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar); 732DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar);
718DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar); 733DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar);
719DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar); 734DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
720DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar); 735DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
721DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar); 736DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
722 737
723#endif 738#endif
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 6a151ce70e86..d87ac96e37ed 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -196,6 +196,9 @@ static pgd_t *efi_pgd;
196 * because we want to avoid inserting EFI region mappings (EFI_VA_END 196 * because we want to avoid inserting EFI region mappings (EFI_VA_END
197 * to EFI_VA_START) into the standard kernel page tables. Everything 197 * to EFI_VA_START) into the standard kernel page tables. Everything
198 * else can be shared, see efi_sync_low_kernel_mappings(). 198 * else can be shared, see efi_sync_low_kernel_mappings().
199 *
200 * We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the
201 * allocation.
199 */ 202 */
200int __init efi_alloc_page_tables(void) 203int __init efi_alloc_page_tables(void)
201{ 204{
@@ -208,7 +211,7 @@ int __init efi_alloc_page_tables(void)
208 return 0; 211 return 0;
209 212
210 gfp_mask = GFP_KERNEL | __GFP_ZERO; 213 gfp_mask = GFP_KERNEL | __GFP_ZERO;
211 efi_pgd = (pgd_t *)__get_free_page(gfp_mask); 214 efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
212 if (!efi_pgd) 215 if (!efi_pgd)
213 return -ENOMEM; 216 return -ENOMEM;
214 217
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index f44c0bc95aa2..8538a6723171 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
299 local_flush_tlb(); 299 local_flush_tlb();
300 stat->d_alltlb++; 300 stat->d_alltlb++;
301 } else { 301 } else {
302 __flush_tlb_one(msg->address); 302 __flush_tlb_single(msg->address);
303 stat->d_onetlb++; 303 stat->d_onetlb++;
304 } 304 }
305 stat->d_requestee++; 305 stat->d_requestee++;
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index 5f6fd860820a..e4cb9f4cde8a 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -128,7 +128,7 @@ static void uv_domain_free(struct irq_domain *domain, unsigned int virq,
128 * on the specified blade to allow the sending of MSIs to the specified CPU. 128 * on the specified blade to allow the sending of MSIs to the specified CPU.
129 */ 129 */
130static int uv_domain_activate(struct irq_domain *domain, 130static int uv_domain_activate(struct irq_domain *domain,
131 struct irq_data *irq_data, bool early) 131 struct irq_data *irq_data, bool reserve)
132{ 132{
133 uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data); 133 uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data);
134 return 0; 134 return 0;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 5191de14f4df..a7d966964c6f 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -82,12 +82,8 @@ static void __save_processor_state(struct saved_context *ctxt)
82 /* 82 /*
83 * descriptor tables 83 * descriptor tables
84 */ 84 */
85#ifdef CONFIG_X86_32
86 store_idt(&ctxt->idt); 85 store_idt(&ctxt->idt);
87#else 86
88/* CONFIG_X86_64 */
89 store_idt((struct desc_ptr *)&ctxt->idt_limit);
90#endif
91 /* 87 /*
92 * We save it here, but restore it only in the hibernate case. 88 * We save it here, but restore it only in the hibernate case.
93 * For ACPI S3 resume, this is loaded via 'early_gdt_desc' in 64-bit 89 * For ACPI S3 resume, this is loaded via 'early_gdt_desc' in 64-bit
@@ -103,22 +99,18 @@ static void __save_processor_state(struct saved_context *ctxt)
103 /* 99 /*
104 * segment registers 100 * segment registers
105 */ 101 */
106#ifdef CONFIG_X86_32 102#ifdef CONFIG_X86_32_LAZY_GS
107 savesegment(es, ctxt->es);
108 savesegment(fs, ctxt->fs);
109 savesegment(gs, ctxt->gs); 103 savesegment(gs, ctxt->gs);
110 savesegment(ss, ctxt->ss); 104#endif
111#else 105#ifdef CONFIG_X86_64
112/* CONFIG_X86_64 */ 106 savesegment(gs, ctxt->gs);
113 asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds)); 107 savesegment(fs, ctxt->fs);
114 asm volatile ("movw %%es, %0" : "=m" (ctxt->es)); 108 savesegment(ds, ctxt->ds);
115 asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs)); 109 savesegment(es, ctxt->es);
116 asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
117 asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
118 110
119 rdmsrl(MSR_FS_BASE, ctxt->fs_base); 111 rdmsrl(MSR_FS_BASE, ctxt->fs_base);
120 rdmsrl(MSR_GS_BASE, ctxt->gs_base); 112 rdmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
121 rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); 113 rdmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
122 mtrr_save_fixed_ranges(NULL); 114 mtrr_save_fixed_ranges(NULL);
123 115
124 rdmsrl(MSR_EFER, ctxt->efer); 116 rdmsrl(MSR_EFER, ctxt->efer);
@@ -160,17 +152,19 @@ static void do_fpu_end(void)
160static void fix_processor_context(void) 152static void fix_processor_context(void)
161{ 153{
162 int cpu = smp_processor_id(); 154 int cpu = smp_processor_id();
163 struct tss_struct *t = &per_cpu(cpu_tss, cpu);
164#ifdef CONFIG_X86_64 155#ifdef CONFIG_X86_64
165 struct desc_struct *desc = get_cpu_gdt_rw(cpu); 156 struct desc_struct *desc = get_cpu_gdt_rw(cpu);
166 tss_desc tss; 157 tss_desc tss;
167#endif 158#endif
168 set_tss_desc(cpu, t); /* 159
169 * This just modifies memory; should not be 160 /*
170 * necessary. But... This is necessary, because 161 * We need to reload TR, which requires that we change the
171 * 386 hardware has concept of busy TSS or some 162 * GDT entry to indicate "available" first.
172 * similar stupidity. 163 *
173 */ 164 * XXX: This could probably all be replaced by a call to
165 * force_reload_TR().
166 */
167 set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
174 168
175#ifdef CONFIG_X86_64 169#ifdef CONFIG_X86_64
176 memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc)); 170 memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
@@ -178,6 +172,9 @@ static void fix_processor_context(void)
178 write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS); 172 write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS);
179 173
180 syscall_init(); /* This sets MSR_*STAR and related */ 174 syscall_init(); /* This sets MSR_*STAR and related */
175#else
176 if (boot_cpu_has(X86_FEATURE_SEP))
177 enable_sep_cpu();
181#endif 178#endif
182 load_TR_desc(); /* This does ltr */ 179 load_TR_desc(); /* This does ltr */
183 load_mm_ldt(current->active_mm); /* This does lldt */ 180 load_mm_ldt(current->active_mm); /* This does lldt */
@@ -190,9 +187,12 @@ static void fix_processor_context(void)
190} 187}
191 188
192/** 189/**
193 * __restore_processor_state - restore the contents of CPU registers saved 190 * __restore_processor_state - restore the contents of CPU registers saved
194 * by __save_processor_state() 191 * by __save_processor_state()
195 * @ctxt - structure to load the registers contents from 192 * @ctxt - structure to load the registers contents from
193 *
194 * The asm code that gets us here will have restored a usable GDT, although
195 * it will be pointing to the wrong alias.
196 */ 196 */
197static void notrace __restore_processor_state(struct saved_context *ctxt) 197static void notrace __restore_processor_state(struct saved_context *ctxt)
198{ 198{
@@ -215,57 +215,50 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
215 write_cr2(ctxt->cr2); 215 write_cr2(ctxt->cr2);
216 write_cr0(ctxt->cr0); 216 write_cr0(ctxt->cr0);
217 217
218 /* Restore the IDT. */
219 load_idt(&ctxt->idt);
220
218 /* 221 /*
219 * now restore the descriptor tables to their proper values 222 * Just in case the asm code got us here with the SS, DS, or ES
220 * ltr is done i fix_processor_context(). 223 * out of sync with the GDT, update them.
221 */ 224 */
222#ifdef CONFIG_X86_32 225 loadsegment(ss, __KERNEL_DS);
223 load_idt(&ctxt->idt); 226 loadsegment(ds, __USER_DS);
224#else 227 loadsegment(es, __USER_DS);
225/* CONFIG_X86_64 */
226 load_idt((const struct desc_ptr *)&ctxt->idt_limit);
227#endif
228 228
229#ifdef CONFIG_X86_64
230 /* 229 /*
231 * We need GSBASE restored before percpu access can work. 230 * Restore percpu access. Percpu access can happen in exception
232 * percpu access can happen in exception handlers or in complicated 231 * handlers or in complicated helpers like load_gs_index().
233 * helpers like load_gs_index().
234 */ 232 */
235 wrmsrl(MSR_GS_BASE, ctxt->gs_base); 233#ifdef CONFIG_X86_64
234 wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
235#else
236 loadsegment(fs, __KERNEL_PERCPU);
237 loadsegment(gs, __KERNEL_STACK_CANARY);
236#endif 238#endif
237 239
240 /* Restore the TSS, RO GDT, LDT, and usermode-relevant MSRs. */
238 fix_processor_context(); 241 fix_processor_context();
239 242
240 /* 243 /*
241 * Restore segment registers. This happens after restoring the GDT 244 * Now that we have descriptor tables fully restored and working
242 * and LDT, which happen in fix_processor_context(). 245 * exception handling, restore the usermode segments.
243 */ 246 */
244#ifdef CONFIG_X86_32 247#ifdef CONFIG_X86_64
248 loadsegment(ds, ctxt->es);
245 loadsegment(es, ctxt->es); 249 loadsegment(es, ctxt->es);
246 loadsegment(fs, ctxt->fs); 250 loadsegment(fs, ctxt->fs);
247 loadsegment(gs, ctxt->gs);
248 loadsegment(ss, ctxt->ss);
249
250 /*
251 * sysenter MSRs
252 */
253 if (boot_cpu_has(X86_FEATURE_SEP))
254 enable_sep_cpu();
255#else
256/* CONFIG_X86_64 */
257 asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
258 asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
259 asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
260 load_gs_index(ctxt->gs); 251 load_gs_index(ctxt->gs);
261 asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
262 252
263 /* 253 /*
264 * Restore FSBASE and user GSBASE after reloading the respective 254 * Restore FSBASE and GSBASE after restoring the selectors, since
265 * segment selectors. 255 * restoring the selectors clobbers the bases. Keep in mind
256 * that MSR_KERNEL_GS_BASE is horribly misnamed.
266 */ 257 */
267 wrmsrl(MSR_FS_BASE, ctxt->fs_base); 258 wrmsrl(MSR_FS_BASE, ctxt->fs_base);
268 wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); 259 wrmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
260#elif defined(CONFIG_X86_32_LAZY_GS)
261 loadsegment(gs, ctxt->gs);
269#endif 262#endif
270 263
271 do_fpu_end(); 264 do_fpu_end();
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 6b830d4cb4c8..de58533d3664 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -57,7 +57,7 @@ static u32 xen_apic_read(u32 reg)
57 return 0; 57 return 0;
58 58
59 if (reg == APIC_LVR) 59 if (reg == APIC_LVR)
60 return 0x10; 60 return 0x14;
61#ifdef CONFIG_X86_32 61#ifdef CONFIG_X86_32
62 if (reg == APIC_LDR) 62 if (reg == APIC_LDR)
63 return SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); 63 return SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d669e9d89001..c9081c6671f0 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1,8 +1,12 @@
1#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
2#include <linux/bootmem.h>
3#endif
1#include <linux/cpu.h> 4#include <linux/cpu.h>
2#include <linux/kexec.h> 5#include <linux/kexec.h>
3 6
4#include <xen/features.h> 7#include <xen/features.h>
5#include <xen/page.h> 8#include <xen/page.h>
9#include <xen/interface/memory.h>
6 10
7#include <asm/xen/hypercall.h> 11#include <asm/xen/hypercall.h>
8#include <asm/xen/hypervisor.h> 12#include <asm/xen/hypervisor.h>
@@ -331,3 +335,80 @@ void xen_arch_unregister_cpu(int num)
331} 335}
332EXPORT_SYMBOL(xen_arch_unregister_cpu); 336EXPORT_SYMBOL(xen_arch_unregister_cpu);
333#endif 337#endif
338
339#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
340void __init arch_xen_balloon_init(struct resource *hostmem_resource)
341{
342 struct xen_memory_map memmap;
343 int rc;
344 unsigned int i, last_guest_ram;
345 phys_addr_t max_addr = PFN_PHYS(max_pfn);
346 struct e820_table *xen_e820_table;
347 const struct e820_entry *entry;
348 struct resource *res;
349
350 if (!xen_initial_domain())
351 return;
352
353 xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL);
354 if (!xen_e820_table)
355 return;
356
357 memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries);
358 set_xen_guest_handle(memmap.buffer, xen_e820_table->entries);
359 rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap);
360 if (rc) {
361 pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc);
362 goto out;
363 }
364
365 last_guest_ram = 0;
366 for (i = 0; i < memmap.nr_entries; i++) {
367 if (xen_e820_table->entries[i].addr >= max_addr)
368 break;
369 if (xen_e820_table->entries[i].type == E820_TYPE_RAM)
370 last_guest_ram = i;
371 }
372
373 entry = &xen_e820_table->entries[last_guest_ram];
374 if (max_addr >= entry->addr + entry->size)
375 goto out; /* No unallocated host RAM. */
376
377 hostmem_resource->start = max_addr;
378 hostmem_resource->end = entry->addr + entry->size;
379
380 /*
381 * Mark non-RAM regions between the end of dom0 RAM and end of host RAM
382 * as unavailable. The rest of that region can be used for hotplug-based
383 * ballooning.
384 */
385 for (; i < memmap.nr_entries; i++) {
386 entry = &xen_e820_table->entries[i];
387
388 if (entry->type == E820_TYPE_RAM)
389 continue;
390
391 if (entry->addr >= hostmem_resource->end)
392 break;
393
394 res = kzalloc(sizeof(*res), GFP_KERNEL);
395 if (!res)
396 goto out;
397
398 res->name = "Unavailable host RAM";
399 res->start = entry->addr;
400 res->end = (entry->addr + entry->size < hostmem_resource->end) ?
401 entry->addr + entry->size : hostmem_resource->end;
402 rc = insert_resource(hostmem_resource, res);
403 if (rc) {
404 pr_warn("%s: Can't insert [%llx - %llx) (%d)\n",
405 __func__, res->start, res->end, rc);
406 kfree(res);
407 goto out;
408 }
409 }
410
411 out:
412 kfree(xen_e820_table);
413}
414#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index f2414c6c5e7c..c047f42552e1 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -88,6 +88,8 @@
88#include "multicalls.h" 88#include "multicalls.h"
89#include "pmu.h" 89#include "pmu.h"
90 90
91#include "../kernel/cpu/cpu.h" /* get_cpu_cap() */
92
91void *xen_initial_gdt; 93void *xen_initial_gdt;
92 94
93static int xen_cpu_up_prepare_pv(unsigned int cpu); 95static int xen_cpu_up_prepare_pv(unsigned int cpu);
@@ -826,7 +828,7 @@ static void xen_load_sp0(unsigned long sp0)
826 mcs = xen_mc_entry(0); 828 mcs = xen_mc_entry(0);
827 MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0); 829 MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
828 xen_mc_issue(PARAVIRT_LAZY_CPU); 830 xen_mc_issue(PARAVIRT_LAZY_CPU);
829 this_cpu_write(cpu_tss.x86_tss.sp0, sp0); 831 this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
830} 832}
831 833
832void xen_set_iopl_mask(unsigned mask) 834void xen_set_iopl_mask(unsigned mask)
@@ -1258,6 +1260,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
1258 __userpte_alloc_gfp &= ~__GFP_HIGHMEM; 1260 __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
1259 1261
1260 /* Work out if we support NX */ 1262 /* Work out if we support NX */
1263 get_cpu_cap(&boot_cpu_data);
1261 x86_configure_nx(); 1264 x86_configure_nx();
1262 1265
1263 /* Get mfn list */ 1266 /* Get mfn list */
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index fc048ec686e7..4d62c071b166 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1902,6 +1902,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1902 /* Graft it onto L4[511][510] */ 1902 /* Graft it onto L4[511][510] */
1903 copy_page(level2_kernel_pgt, l2); 1903 copy_page(level2_kernel_pgt, l2);
1904 1904
1905 /*
1906 * Zap execute permission from the ident map. Due to the sharing of
1907 * L1 entries we need to do this in the L2.
1908 */
1909 if (__supported_pte_mask & _PAGE_NX) {
1910 for (i = 0; i < PTRS_PER_PMD; ++i) {
1911 if (pmd_none(level2_ident_pgt[i]))
1912 continue;
1913 level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX);
1914 }
1915 }
1916
1905 /* Copy the initial P->M table mappings if necessary. */ 1917 /* Copy the initial P->M table mappings if necessary. */
1906 i = pgd_index(xen_start_info->mfn_list); 1918 i = pgd_index(xen_start_info->mfn_list);
1907 if (i && i < pgd_index(__START_KERNEL_map)) 1919 if (i && i < pgd_index(__START_KERNEL_map))
@@ -2261,7 +2273,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2261 2273
2262 switch (idx) { 2274 switch (idx) {
2263 case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: 2275 case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
2264 case FIX_RO_IDT:
2265#ifdef CONFIG_X86_32 2276#ifdef CONFIG_X86_32
2266 case FIX_WP_TEST: 2277 case FIX_WP_TEST:
2267# ifdef CONFIG_HIGHMEM 2278# ifdef CONFIG_HIGHMEM
@@ -2272,7 +2283,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2272#endif 2283#endif
2273 case FIX_TEXT_POKE0: 2284 case FIX_TEXT_POKE0:
2274 case FIX_TEXT_POKE1: 2285 case FIX_TEXT_POKE1:
2275 case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
2276 /* All local page mappings */ 2286 /* All local page mappings */
2277 pte = pfn_pte(phys, prot); 2287 pte = pfn_pte(phys, prot);
2278 break; 2288 break;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index c114ca767b3b..6e0d2086eacb 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -808,7 +808,6 @@ char * __init xen_memory_setup(void)
808 addr = xen_e820_table.entries[0].addr; 808 addr = xen_e820_table.entries[0].addr;
809 size = xen_e820_table.entries[0].size; 809 size = xen_e820_table.entries[0].size;
810 while (i < xen_e820_table.nr_entries) { 810 while (i < xen_e820_table.nr_entries) {
811 bool discard = false;
812 811
813 chunk_size = size; 812 chunk_size = size;
814 type = xen_e820_table.entries[i].type; 813 type = xen_e820_table.entries[i].type;
@@ -824,11 +823,10 @@ char * __init xen_memory_setup(void)
824 xen_add_extra_mem(pfn_s, n_pfns); 823 xen_add_extra_mem(pfn_s, n_pfns);
825 xen_max_p2m_pfn = pfn_s + n_pfns; 824 xen_max_p2m_pfn = pfn_s + n_pfns;
826 } else 825 } else
827 discard = true; 826 type = E820_TYPE_UNUSABLE;
828 } 827 }
829 828
830 if (!discard) 829 xen_align_and_add_e820_region(addr, chunk_size, type);
831 xen_align_and_add_e820_region(addr, chunk_size, type);
832 830
833 addr += chunk_size; 831 addr += chunk_size;
834 size -= chunk_size; 832 size -= chunk_size;
diff --git a/block/bio.c b/block/bio.c
index 8bfdea58159b..9ef6cf3addb3 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -599,6 +599,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
599 bio->bi_disk = bio_src->bi_disk; 599 bio->bi_disk = bio_src->bi_disk;
600 bio->bi_partno = bio_src->bi_partno; 600 bio->bi_partno = bio_src->bi_partno;
601 bio_set_flag(bio, BIO_CLONED); 601 bio_set_flag(bio, BIO_CLONED);
602 if (bio_flagged(bio_src, BIO_THROTTLED))
603 bio_set_flag(bio, BIO_THROTTLED);
602 bio->bi_opf = bio_src->bi_opf; 604 bio->bi_opf = bio_src->bi_opf;
603 bio->bi_write_hint = bio_src->bi_write_hint; 605 bio->bi_write_hint = bio_src->bi_write_hint;
604 bio->bi_iter = bio_src->bi_iter; 606 bio->bi_iter = bio_src->bi_iter;
diff --git a/block/blk-map.c b/block/blk-map.c
index b21f8e86f120..d3a94719f03f 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -12,22 +12,29 @@
12#include "blk.h" 12#include "blk.h"
13 13
14/* 14/*
15 * Append a bio to a passthrough request. Only works can be merged into 15 * Append a bio to a passthrough request. Only works if the bio can be merged
16 * the request based on the driver constraints. 16 * into the request based on the driver constraints.
17 */ 17 */
18int blk_rq_append_bio(struct request *rq, struct bio *bio) 18int blk_rq_append_bio(struct request *rq, struct bio **bio)
19{ 19{
20 blk_queue_bounce(rq->q, &bio); 20 struct bio *orig_bio = *bio;
21
22 blk_queue_bounce(rq->q, bio);
21 23
22 if (!rq->bio) { 24 if (!rq->bio) {
23 blk_rq_bio_prep(rq->q, rq, bio); 25 blk_rq_bio_prep(rq->q, rq, *bio);
24 } else { 26 } else {
25 if (!ll_back_merge_fn(rq->q, rq, bio)) 27 if (!ll_back_merge_fn(rq->q, rq, *bio)) {
28 if (orig_bio != *bio) {
29 bio_put(*bio);
30 *bio = orig_bio;
31 }
26 return -EINVAL; 32 return -EINVAL;
33 }
27 34
28 rq->biotail->bi_next = bio; 35 rq->biotail->bi_next = *bio;
29 rq->biotail = bio; 36 rq->biotail = *bio;
30 rq->__data_len += bio->bi_iter.bi_size; 37 rq->__data_len += (*bio)->bi_iter.bi_size;
31 } 38 }
32 39
33 return 0; 40 return 0;
@@ -73,14 +80,12 @@ static int __blk_rq_map_user_iov(struct request *rq,
73 * We link the bounce buffer in and could have to traverse it 80 * We link the bounce buffer in and could have to traverse it
74 * later so we have to get a ref to prevent it from being freed 81 * later so we have to get a ref to prevent it from being freed
75 */ 82 */
76 ret = blk_rq_append_bio(rq, bio); 83 ret = blk_rq_append_bio(rq, &bio);
77 bio_get(bio);
78 if (ret) { 84 if (ret) {
79 bio_endio(bio);
80 __blk_rq_unmap_user(orig_bio); 85 __blk_rq_unmap_user(orig_bio);
81 bio_put(bio);
82 return ret; 86 return ret;
83 } 87 }
88 bio_get(bio);
84 89
85 return 0; 90 return 0;
86} 91}
@@ -213,7 +218,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
213 int reading = rq_data_dir(rq) == READ; 218 int reading = rq_data_dir(rq) == READ;
214 unsigned long addr = (unsigned long) kbuf; 219 unsigned long addr = (unsigned long) kbuf;
215 int do_copy = 0; 220 int do_copy = 0;
216 struct bio *bio; 221 struct bio *bio, *orig_bio;
217 int ret; 222 int ret;
218 223
219 if (len > (queue_max_hw_sectors(q) << 9)) 224 if (len > (queue_max_hw_sectors(q) << 9))
@@ -236,10 +241,11 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
236 if (do_copy) 241 if (do_copy)
237 rq->rq_flags |= RQF_COPY_USER; 242 rq->rq_flags |= RQF_COPY_USER;
238 243
239 ret = blk_rq_append_bio(rq, bio); 244 orig_bio = bio;
245 ret = blk_rq_append_bio(rq, &bio);
240 if (unlikely(ret)) { 246 if (unlikely(ret)) {
241 /* request is too big */ 247 /* request is too big */
242 bio_put(bio); 248 bio_put(orig_bio);
243 return ret; 249 return ret;
244 } 250 }
245 251
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 825bc29767e6..d19f416d6101 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2226,13 +2226,7 @@ again:
2226out_unlock: 2226out_unlock:
2227 spin_unlock_irq(q->queue_lock); 2227 spin_unlock_irq(q->queue_lock);
2228out: 2228out:
2229 /* 2229 bio_set_flag(bio, BIO_THROTTLED);
2230 * As multiple blk-throtls may stack in the same issue path, we
2231 * don't want bios to leave with the flag set. Clear the flag if
2232 * being issued.
2233 */
2234 if (!throttled)
2235 bio_clear_flag(bio, BIO_THROTTLED);
2236 2230
2237#ifdef CONFIG_BLK_DEV_THROTTLING_LOW 2231#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
2238 if (throttled || !td->track_bio_latency) 2232 if (throttled || !td->track_bio_latency)
diff --git a/block/bounce.c b/block/bounce.c
index fceb1a96480b..1d05c422c932 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -200,6 +200,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
200 unsigned i = 0; 200 unsigned i = 0;
201 bool bounce = false; 201 bool bounce = false;
202 int sectors = 0; 202 int sectors = 0;
203 bool passthrough = bio_is_passthrough(*bio_orig);
203 204
204 bio_for_each_segment(from, *bio_orig, iter) { 205 bio_for_each_segment(from, *bio_orig, iter) {
205 if (i++ < BIO_MAX_PAGES) 206 if (i++ < BIO_MAX_PAGES)
@@ -210,13 +211,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
210 if (!bounce) 211 if (!bounce)
211 return; 212 return;
212 213
213 if (sectors < bio_sectors(*bio_orig)) { 214 if (!passthrough && sectors < bio_sectors(*bio_orig)) {
214 bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split); 215 bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
215 bio_chain(bio, *bio_orig); 216 bio_chain(bio, *bio_orig);
216 generic_make_request(*bio_orig); 217 generic_make_request(*bio_orig);
217 *bio_orig = bio; 218 *bio_orig = bio;
218 } 219 }
219 bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set); 220 bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL :
221 bounce_bio_set);
220 222
221 bio_for_each_segment_all(to, bio, i) { 223 bio_for_each_segment_all(to, bio, i) {
222 struct page *page = to->bv_page; 224 struct page *page = to->bv_page;
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index b4df317c2916..f95c60774ce8 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -100,9 +100,13 @@ struct kyber_hctx_data {
100 unsigned int cur_domain; 100 unsigned int cur_domain;
101 unsigned int batching; 101 unsigned int batching;
102 wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS]; 102 wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
103 struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
103 atomic_t wait_index[KYBER_NUM_DOMAINS]; 104 atomic_t wait_index[KYBER_NUM_DOMAINS];
104}; 105};
105 106
107static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
108 void *key);
109
106static int rq_sched_domain(const struct request *rq) 110static int rq_sched_domain(const struct request *rq)
107{ 111{
108 unsigned int op = rq->cmd_flags; 112 unsigned int op = rq->cmd_flags;
@@ -385,6 +389,9 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
385 389
386 for (i = 0; i < KYBER_NUM_DOMAINS; i++) { 390 for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
387 INIT_LIST_HEAD(&khd->rqs[i]); 391 INIT_LIST_HEAD(&khd->rqs[i]);
392 init_waitqueue_func_entry(&khd->domain_wait[i],
393 kyber_domain_wake);
394 khd->domain_wait[i].private = hctx;
388 INIT_LIST_HEAD(&khd->domain_wait[i].entry); 395 INIT_LIST_HEAD(&khd->domain_wait[i].entry);
389 atomic_set(&khd->wait_index[i], 0); 396 atomic_set(&khd->wait_index[i], 0);
390 } 397 }
@@ -524,35 +531,39 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
524 int nr; 531 int nr;
525 532
526 nr = __sbitmap_queue_get(domain_tokens); 533 nr = __sbitmap_queue_get(domain_tokens);
527 if (nr >= 0)
528 return nr;
529 534
530 /* 535 /*
531 * If we failed to get a domain token, make sure the hardware queue is 536 * If we failed to get a domain token, make sure the hardware queue is
532 * run when one becomes available. Note that this is serialized on 537 * run when one becomes available. Note that this is serialized on
533 * khd->lock, but we still need to be careful about the waker. 538 * khd->lock, but we still need to be careful about the waker.
534 */ 539 */
535 if (list_empty_careful(&wait->entry)) { 540 if (nr < 0 && list_empty_careful(&wait->entry)) {
536 init_waitqueue_func_entry(wait, kyber_domain_wake);
537 wait->private = hctx;
538 ws = sbq_wait_ptr(domain_tokens, 541 ws = sbq_wait_ptr(domain_tokens,
539 &khd->wait_index[sched_domain]); 542 &khd->wait_index[sched_domain]);
543 khd->domain_ws[sched_domain] = ws;
540 add_wait_queue(&ws->wait, wait); 544 add_wait_queue(&ws->wait, wait);
541 545
542 /* 546 /*
543 * Try again in case a token was freed before we got on the wait 547 * Try again in case a token was freed before we got on the wait
544 * queue. The waker may have already removed the entry from the 548 * queue.
545 * wait queue, but list_del_init() is okay with that.
546 */ 549 */
547 nr = __sbitmap_queue_get(domain_tokens); 550 nr = __sbitmap_queue_get(domain_tokens);
548 if (nr >= 0) { 551 }
549 unsigned long flags;
550 552
551 spin_lock_irqsave(&ws->wait.lock, flags); 553 /*
552 list_del_init(&wait->entry); 554 * If we got a token while we were on the wait queue, remove ourselves
553 spin_unlock_irqrestore(&ws->wait.lock, flags); 555 * from the wait queue to ensure that all wake ups make forward
554 } 556 * progress. It's possible that the waker already deleted the entry
557 * between the !list_empty_careful() check and us grabbing the lock, but
558 * list_del_init() is okay with that.
559 */
560 if (nr >= 0 && !list_empty_careful(&wait->entry)) {
561 ws = khd->domain_ws[sched_domain];
562 spin_lock_irq(&ws->wait.lock);
563 list_del_init(&wait->entry);
564 spin_unlock_irq(&ws->wait.lock);
555 } 565 }
566
556 return nr; 567 return nr;
557} 568}
558 569
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 358749c38894..444a387df219 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -672,14 +672,15 @@ void af_alg_free_areq_sgls(struct af_alg_async_req *areq)
672 } 672 }
673 673
674 tsgl = areq->tsgl; 674 tsgl = areq->tsgl;
675 for_each_sg(tsgl, sg, areq->tsgl_entries, i) { 675 if (tsgl) {
676 if (!sg_page(sg)) 676 for_each_sg(tsgl, sg, areq->tsgl_entries, i) {
677 continue; 677 if (!sg_page(sg))
678 put_page(sg_page(sg)); 678 continue;
679 } 679 put_page(sg_page(sg));
680 }
680 681
681 if (areq->tsgl && areq->tsgl_entries)
682 sock_kfree_s(sk, tsgl, areq->tsgl_entries * sizeof(*tsgl)); 682 sock_kfree_s(sk, tsgl, areq->tsgl_entries * sizeof(*tsgl));
683 }
683} 684}
684EXPORT_SYMBOL_GPL(af_alg_free_areq_sgls); 685EXPORT_SYMBOL_GPL(af_alg_free_areq_sgls);
685 686
@@ -1137,12 +1138,6 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
1137 if (!af_alg_readable(sk)) 1138 if (!af_alg_readable(sk))
1138 break; 1139 break;
1139 1140
1140 if (!ctx->used) {
1141 err = af_alg_wait_for_data(sk, flags);
1142 if (err)
1143 return err;
1144 }
1145
1146 seglen = min_t(size_t, (maxsize - len), 1141 seglen = min_t(size_t, (maxsize - len),
1147 msg_data_left(msg)); 1142 msg_data_left(msg));
1148 1143
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 805f485ddf1b..ddcc45f77edd 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -111,6 +111,12 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
111 size_t usedpages = 0; /* [in] RX bufs to be used from user */ 111 size_t usedpages = 0; /* [in] RX bufs to be used from user */
112 size_t processed = 0; /* [in] TX bufs to be consumed */ 112 size_t processed = 0; /* [in] TX bufs to be consumed */
113 113
114 if (!ctx->used) {
115 err = af_alg_wait_for_data(sk, flags);
116 if (err)
117 return err;
118 }
119
114 /* 120 /*
115 * Data length provided by caller via sendmsg/sendpage that has not 121 * Data length provided by caller via sendmsg/sendpage that has not
116 * yet been processed. 122 * yet been processed.
@@ -285,6 +291,10 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
285 /* AIO operation */ 291 /* AIO operation */
286 sock_hold(sk); 292 sock_hold(sk);
287 areq->iocb = msg->msg_iocb; 293 areq->iocb = msg->msg_iocb;
294
295 /* Remember output size that will be generated. */
296 areq->outlen = outlen;
297
288 aead_request_set_callback(&areq->cra_u.aead_req, 298 aead_request_set_callback(&areq->cra_u.aead_req,
289 CRYPTO_TFM_REQ_MAY_BACKLOG, 299 CRYPTO_TFM_REQ_MAY_BACKLOG,
290 af_alg_async_cb, areq); 300 af_alg_async_cb, areq);
@@ -292,12 +302,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
292 crypto_aead_decrypt(&areq->cra_u.aead_req); 302 crypto_aead_decrypt(&areq->cra_u.aead_req);
293 303
294 /* AIO operation in progress */ 304 /* AIO operation in progress */
295 if (err == -EINPROGRESS || err == -EBUSY) { 305 if (err == -EINPROGRESS || err == -EBUSY)
296 /* Remember output size that will be generated. */
297 areq->outlen = outlen;
298
299 return -EIOCBQUEUED; 306 return -EIOCBQUEUED;
300 }
301 307
302 sock_put(sk); 308 sock_put(sk);
303 } else { 309 } else {
@@ -503,6 +509,7 @@ static void aead_release(void *private)
503 struct aead_tfm *tfm = private; 509 struct aead_tfm *tfm = private;
504 510
505 crypto_free_aead(tfm->aead); 511 crypto_free_aead(tfm->aead);
512 crypto_put_default_null_skcipher2();
506 kfree(tfm); 513 kfree(tfm);
507} 514}
508 515
@@ -535,7 +542,6 @@ static void aead_sock_destruct(struct sock *sk)
535 unsigned int ivlen = crypto_aead_ivsize(tfm); 542 unsigned int ivlen = crypto_aead_ivsize(tfm);
536 543
537 af_alg_pull_tsgl(sk, ctx->used, NULL, 0); 544 af_alg_pull_tsgl(sk, ctx->used, NULL, 0);
538 crypto_put_default_null_skcipher2();
539 sock_kzfree_s(sk, ctx->iv, ivlen); 545 sock_kzfree_s(sk, ctx->iv, ivlen);
540 sock_kfree_s(sk, ctx, ctx->len); 546 sock_kfree_s(sk, ctx, ctx->len);
541 af_alg_release_parent(sk); 547 af_alg_release_parent(sk);
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index 30cff827dd8f..baef9bfccdda 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -72,6 +72,12 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
72 int err = 0; 72 int err = 0;
73 size_t len = 0; 73 size_t len = 0;
74 74
75 if (!ctx->used) {
76 err = af_alg_wait_for_data(sk, flags);
77 if (err)
78 return err;
79 }
80
75 /* Allocate cipher request for current operation. */ 81 /* Allocate cipher request for current operation. */
76 areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) + 82 areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) +
77 crypto_skcipher_reqsize(tfm)); 83 crypto_skcipher_reqsize(tfm));
@@ -119,6 +125,10 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
119 /* AIO operation */ 125 /* AIO operation */
120 sock_hold(sk); 126 sock_hold(sk);
121 areq->iocb = msg->msg_iocb; 127 areq->iocb = msg->msg_iocb;
128
129 /* Remember output size that will be generated. */
130 areq->outlen = len;
131
122 skcipher_request_set_callback(&areq->cra_u.skcipher_req, 132 skcipher_request_set_callback(&areq->cra_u.skcipher_req,
123 CRYPTO_TFM_REQ_MAY_SLEEP, 133 CRYPTO_TFM_REQ_MAY_SLEEP,
124 af_alg_async_cb, areq); 134 af_alg_async_cb, areq);
@@ -127,12 +137,8 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
127 crypto_skcipher_decrypt(&areq->cra_u.skcipher_req); 137 crypto_skcipher_decrypt(&areq->cra_u.skcipher_req);
128 138
129 /* AIO operation in progress */ 139 /* AIO operation in progress */
130 if (err == -EINPROGRESS || err == -EBUSY) { 140 if (err == -EINPROGRESS || err == -EBUSY)
131 /* Remember output size that will be generated. */
132 areq->outlen = len;
133
134 return -EIOCBQUEUED; 141 return -EIOCBQUEUED;
135 }
136 142
137 sock_put(sk); 143 sock_put(sk);
138 } else { 144 } else {
diff --git a/crypto/hmac.c b/crypto/hmac.c
index 92871dc2a63e..e74730224f0a 100644
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -195,11 +195,15 @@ static int hmac_create(struct crypto_template *tmpl, struct rtattr **tb)
195 salg = shash_attr_alg(tb[1], 0, 0); 195 salg = shash_attr_alg(tb[1], 0, 0);
196 if (IS_ERR(salg)) 196 if (IS_ERR(salg))
197 return PTR_ERR(salg); 197 return PTR_ERR(salg);
198 alg = &salg->base;
198 199
200 /* The underlying hash algorithm must be unkeyed */
199 err = -EINVAL; 201 err = -EINVAL;
202 if (crypto_shash_alg_has_setkey(salg))
203 goto out_put_alg;
204
200 ds = salg->digestsize; 205 ds = salg->digestsize;
201 ss = salg->statesize; 206 ss = salg->statesize;
202 alg = &salg->base;
203 if (ds > alg->cra_blocksize || 207 if (ds > alg->cra_blocksize ||
204 ss < alg->cra_blocksize) 208 ss < alg->cra_blocksize)
205 goto out_put_alg; 209 goto out_put_alg;
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c
index 4e6472658852..eca04d3729b3 100644
--- a/crypto/mcryptd.c
+++ b/crypto/mcryptd.c
@@ -81,6 +81,7 @@ static int mcryptd_init_queue(struct mcryptd_queue *queue,
81 pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue); 81 pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue);
82 crypto_init_queue(&cpu_queue->queue, max_cpu_qlen); 82 crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
83 INIT_WORK(&cpu_queue->work, mcryptd_queue_worker); 83 INIT_WORK(&cpu_queue->work, mcryptd_queue_worker);
84 spin_lock_init(&cpu_queue->q_lock);
84 } 85 }
85 return 0; 86 return 0;
86} 87}
@@ -104,15 +105,16 @@ static int mcryptd_enqueue_request(struct mcryptd_queue *queue,
104 int cpu, err; 105 int cpu, err;
105 struct mcryptd_cpu_queue *cpu_queue; 106 struct mcryptd_cpu_queue *cpu_queue;
106 107
107 cpu = get_cpu(); 108 cpu_queue = raw_cpu_ptr(queue->cpu_queue);
108 cpu_queue = this_cpu_ptr(queue->cpu_queue); 109 spin_lock(&cpu_queue->q_lock);
109 rctx->tag.cpu = cpu; 110 cpu = smp_processor_id();
111 rctx->tag.cpu = smp_processor_id();
110 112
111 err = crypto_enqueue_request(&cpu_queue->queue, request); 113 err = crypto_enqueue_request(&cpu_queue->queue, request);
112 pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n", 114 pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n",
113 cpu, cpu_queue, request); 115 cpu, cpu_queue, request);
116 spin_unlock(&cpu_queue->q_lock);
114 queue_work_on(cpu, kcrypto_wq, &cpu_queue->work); 117 queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
115 put_cpu();
116 118
117 return err; 119 return err;
118} 120}
@@ -161,16 +163,11 @@ static void mcryptd_queue_worker(struct work_struct *work)
161 cpu_queue = container_of(work, struct mcryptd_cpu_queue, work); 163 cpu_queue = container_of(work, struct mcryptd_cpu_queue, work);
162 i = 0; 164 i = 0;
163 while (i < MCRYPTD_BATCH || single_task_running()) { 165 while (i < MCRYPTD_BATCH || single_task_running()) {
164 /* 166
165 * preempt_disable/enable is used to prevent 167 spin_lock_bh(&cpu_queue->q_lock);
166 * being preempted by mcryptd_enqueue_request()
167 */
168 local_bh_disable();
169 preempt_disable();
170 backlog = crypto_get_backlog(&cpu_queue->queue); 168 backlog = crypto_get_backlog(&cpu_queue->queue);
171 req = crypto_dequeue_request(&cpu_queue->queue); 169 req = crypto_dequeue_request(&cpu_queue->queue);
172 preempt_enable(); 170 spin_unlock_bh(&cpu_queue->q_lock);
173 local_bh_enable();
174 171
175 if (!req) { 172 if (!req) {
176 mcryptd_opportunistic_flush(); 173 mcryptd_opportunistic_flush();
@@ -185,7 +182,7 @@ static void mcryptd_queue_worker(struct work_struct *work)
185 ++i; 182 ++i;
186 } 183 }
187 if (cpu_queue->queue.qlen) 184 if (cpu_queue->queue.qlen)
188 queue_work(kcrypto_wq, &cpu_queue->work); 185 queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work);
189} 186}
190 187
191void mcryptd_flusher(struct work_struct *__work) 188void mcryptd_flusher(struct work_struct *__work)
diff --git a/crypto/rsa_helper.c b/crypto/rsa_helper.c
index 0b66dc824606..cad395d70d78 100644
--- a/crypto/rsa_helper.c
+++ b/crypto/rsa_helper.c
@@ -30,7 +30,7 @@ int rsa_get_n(void *context, size_t hdrlen, unsigned char tag,
30 return -EINVAL; 30 return -EINVAL;
31 31
32 if (fips_enabled) { 32 if (fips_enabled) {
33 while (!*ptr && n_sz) { 33 while (n_sz && !*ptr) {
34 ptr++; 34 ptr++;
35 n_sz--; 35 n_sz--;
36 } 36 }
diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c
index f550b5d94630..d7da0eea5622 100644
--- a/crypto/salsa20_generic.c
+++ b/crypto/salsa20_generic.c
@@ -188,13 +188,6 @@ static int encrypt(struct blkcipher_desc *desc,
188 188
189 salsa20_ivsetup(ctx, walk.iv); 189 salsa20_ivsetup(ctx, walk.iv);
190 190
191 if (likely(walk.nbytes == nbytes))
192 {
193 salsa20_encrypt_bytes(ctx, walk.dst.virt.addr,
194 walk.src.virt.addr, nbytes);
195 return blkcipher_walk_done(desc, &walk, 0);
196 }
197
198 while (walk.nbytes >= 64) { 191 while (walk.nbytes >= 64) {
199 salsa20_encrypt_bytes(ctx, walk.dst.virt.addr, 192 salsa20_encrypt_bytes(ctx, walk.dst.virt.addr,
200 walk.src.virt.addr, 193 walk.src.virt.addr,
diff --git a/crypto/shash.c b/crypto/shash.c
index 325a14da5827..e849d3ee2e27 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -25,11 +25,12 @@
25 25
26static const struct crypto_type crypto_shash_type; 26static const struct crypto_type crypto_shash_type;
27 27
28static int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, 28int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
29 unsigned int keylen) 29 unsigned int keylen)
30{ 30{
31 return -ENOSYS; 31 return -ENOSYS;
32} 32}
33EXPORT_SYMBOL_GPL(shash_no_setkey);
33 34
34static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key, 35static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key,
35 unsigned int keylen) 36 unsigned int keylen)
diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index 778e0ff42bfa..11af5fd6a443 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -449,6 +449,8 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk,
449 449
450 walk->total = req->cryptlen; 450 walk->total = req->cryptlen;
451 walk->nbytes = 0; 451 walk->nbytes = 0;
452 walk->iv = req->iv;
453 walk->oiv = req->iv;
452 454
453 if (unlikely(!walk->total)) 455 if (unlikely(!walk->total))
454 return 0; 456 return 0;
@@ -456,9 +458,6 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk,
456 scatterwalk_start(&walk->in, req->src); 458 scatterwalk_start(&walk->in, req->src);
457 scatterwalk_start(&walk->out, req->dst); 459 scatterwalk_start(&walk->out, req->dst);
458 460
459 walk->iv = req->iv;
460 walk->oiv = req->iv;
461
462 walk->flags &= ~SKCIPHER_WALK_SLEEP; 461 walk->flags &= ~SKCIPHER_WALK_SLEEP;
463 walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? 462 walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
464 SKCIPHER_WALK_SLEEP : 0; 463 SKCIPHER_WALK_SLEEP : 0;
@@ -510,6 +509,8 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk,
510 int err; 509 int err;
511 510
512 walk->nbytes = 0; 511 walk->nbytes = 0;
512 walk->iv = req->iv;
513 walk->oiv = req->iv;
513 514
514 if (unlikely(!walk->total)) 515 if (unlikely(!walk->total))
515 return 0; 516 return 0;
@@ -525,9 +526,6 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk,
525 scatterwalk_done(&walk->in, 0, walk->total); 526 scatterwalk_done(&walk->in, 0, walk->total);
526 scatterwalk_done(&walk->out, 0, walk->total); 527 scatterwalk_done(&walk->out, 0, walk->total);
527 528
528 walk->iv = req->iv;
529 walk->oiv = req->iv;
530
531 if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) 529 if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP)
532 walk->flags |= SKCIPHER_WALK_SLEEP; 530 walk->flags |= SKCIPHER_WALK_SLEEP;
533 else 531 else
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 6742f6c68034..9bff853e85f3 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -1007,7 +1007,7 @@ skip:
1007 /* The record may be cleared by others, try read next record */ 1007 /* The record may be cleared by others, try read next record */
1008 if (len == -ENOENT) 1008 if (len == -ENOENT)
1009 goto skip; 1009 goto skip;
1010 else if (len < sizeof(*rcd)) { 1010 else if (len < 0 || len < sizeof(*rcd)) {
1011 rc = -EIO; 1011 rc = -EIO;
1012 goto out; 1012 goto out;
1013 } 1013 }
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 30e84cc600ae..06ea4749ebd9 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -1171,7 +1171,7 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
1171 struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); 1171 struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
1172 struct cpc_register_resource *desired_reg; 1172 struct cpc_register_resource *desired_reg;
1173 int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); 1173 int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
1174 struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id]; 1174 struct cppc_pcc_data *pcc_ss_data;
1175 int ret = 0; 1175 int ret = 0;
1176 1176
1177 if (!cpc_desc || pcc_ss_id < 0) { 1177 if (!cpc_desc || pcc_ss_id < 0) {
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index e4ffaeec9ec2..a4c8ad98560d 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -1138,7 +1138,7 @@ int acpi_subsys_thaw_noirq(struct device *dev)
1138 * skip all of the subsequent "thaw" callbacks for the device. 1138 * skip all of the subsequent "thaw" callbacks for the device.
1139 */ 1139 */
1140 if (dev_pm_smart_suspend_and_suspended(dev)) { 1140 if (dev_pm_smart_suspend_and_suspended(dev)) {
1141 dev->power.direct_complete = true; 1141 dev_pm_skip_next_resume_phases(dev);
1142 return 0; 1142 return 0;
1143 } 1143 }
1144 1144
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index ff2580e7611d..abeb4df4f22e 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1670,6 +1670,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
1670 dev_name(&adev_dimm->dev)); 1670 dev_name(&adev_dimm->dev));
1671 return -ENXIO; 1671 return -ENXIO;
1672 } 1672 }
1673 /*
1674 * Record nfit_mem for the notification path to track back to
1675 * the nfit sysfs attributes for this dimm device object.
1676 */
1677 dev_set_drvdata(&adev_dimm->dev, nfit_mem);
1673 1678
1674 /* 1679 /*
1675 * Until standardization materializes we need to consider 4 1680 * Until standardization materializes we need to consider 4
@@ -1752,9 +1757,11 @@ static void shutdown_dimm_notify(void *data)
1752 sysfs_put(nfit_mem->flags_attr); 1757 sysfs_put(nfit_mem->flags_attr);
1753 nfit_mem->flags_attr = NULL; 1758 nfit_mem->flags_attr = NULL;
1754 } 1759 }
1755 if (adev_dimm) 1760 if (adev_dimm) {
1756 acpi_remove_notify_handler(adev_dimm->handle, 1761 acpi_remove_notify_handler(adev_dimm->handle,
1757 ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify); 1762 ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify);
1763 dev_set_drvdata(&adev_dimm->dev, NULL);
1764 }
1758 } 1765 }
1759 mutex_unlock(&acpi_desc->init_mutex); 1766 mutex_unlock(&acpi_desc->init_mutex);
1760} 1767}
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index a54a0f1f69a9..778caed570c6 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -482,7 +482,8 @@ enum binder_deferred_state {
482 * @tsk task_struct for group_leader of process 482 * @tsk task_struct for group_leader of process
483 * (invariant after initialized) 483 * (invariant after initialized)
484 * @files files_struct for process 484 * @files files_struct for process
485 * (invariant after initialized) 485 * (protected by @files_lock)
486 * @files_lock mutex to protect @files
486 * @deferred_work_node: element for binder_deferred_list 487 * @deferred_work_node: element for binder_deferred_list
487 * (protected by binder_deferred_lock) 488 * (protected by binder_deferred_lock)
488 * @deferred_work: bitmap of deferred work to perform 489 * @deferred_work: bitmap of deferred work to perform
@@ -530,6 +531,7 @@ struct binder_proc {
530 int pid; 531 int pid;
531 struct task_struct *tsk; 532 struct task_struct *tsk;
532 struct files_struct *files; 533 struct files_struct *files;
534 struct mutex files_lock;
533 struct hlist_node deferred_work_node; 535 struct hlist_node deferred_work_node;
534 int deferred_work; 536 int deferred_work;
535 bool is_dead; 537 bool is_dead;
@@ -924,20 +926,26 @@ static void binder_inc_node_tmpref_ilocked(struct binder_node *node);
924 926
925static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) 927static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
926{ 928{
927 struct files_struct *files = proc->files;
928 unsigned long rlim_cur; 929 unsigned long rlim_cur;
929 unsigned long irqs; 930 unsigned long irqs;
931 int ret;
930 932
931 if (files == NULL) 933 mutex_lock(&proc->files_lock);
932 return -ESRCH; 934 if (proc->files == NULL) {
933 935 ret = -ESRCH;
934 if (!lock_task_sighand(proc->tsk, &irqs)) 936 goto err;
935 return -EMFILE; 937 }
936 938 if (!lock_task_sighand(proc->tsk, &irqs)) {
939 ret = -EMFILE;
940 goto err;
941 }
937 rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); 942 rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
938 unlock_task_sighand(proc->tsk, &irqs); 943 unlock_task_sighand(proc->tsk, &irqs);
939 944
940 return __alloc_fd(files, 0, rlim_cur, flags); 945 ret = __alloc_fd(proc->files, 0, rlim_cur, flags);
946err:
947 mutex_unlock(&proc->files_lock);
948 return ret;
941} 949}
942 950
943/* 951/*
@@ -946,8 +954,10 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
946static void task_fd_install( 954static void task_fd_install(
947 struct binder_proc *proc, unsigned int fd, struct file *file) 955 struct binder_proc *proc, unsigned int fd, struct file *file)
948{ 956{
957 mutex_lock(&proc->files_lock);
949 if (proc->files) 958 if (proc->files)
950 __fd_install(proc->files, fd, file); 959 __fd_install(proc->files, fd, file);
960 mutex_unlock(&proc->files_lock);
951} 961}
952 962
953/* 963/*
@@ -957,9 +967,11 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd)
957{ 967{
958 int retval; 968 int retval;
959 969
960 if (proc->files == NULL) 970 mutex_lock(&proc->files_lock);
961 return -ESRCH; 971 if (proc->files == NULL) {
962 972 retval = -ESRCH;
973 goto err;
974 }
963 retval = __close_fd(proc->files, fd); 975 retval = __close_fd(proc->files, fd);
964 /* can't restart close syscall because file table entry was cleared */ 976 /* can't restart close syscall because file table entry was cleared */
965 if (unlikely(retval == -ERESTARTSYS || 977 if (unlikely(retval == -ERESTARTSYS ||
@@ -967,7 +979,8 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd)
967 retval == -ERESTARTNOHAND || 979 retval == -ERESTARTNOHAND ||
968 retval == -ERESTART_RESTARTBLOCK)) 980 retval == -ERESTART_RESTARTBLOCK))
969 retval = -EINTR; 981 retval = -EINTR;
970 982err:
983 mutex_unlock(&proc->files_lock);
971 return retval; 984 return retval;
972} 985}
973 986
@@ -4690,7 +4703,9 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
4690 ret = binder_alloc_mmap_handler(&proc->alloc, vma); 4703 ret = binder_alloc_mmap_handler(&proc->alloc, vma);
4691 if (ret) 4704 if (ret)
4692 return ret; 4705 return ret;
4706 mutex_lock(&proc->files_lock);
4693 proc->files = get_files_struct(current); 4707 proc->files = get_files_struct(current);
4708 mutex_unlock(&proc->files_lock);
4694 return 0; 4709 return 0;
4695 4710
4696err_bad_arg: 4711err_bad_arg:
@@ -4714,6 +4729,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
4714 spin_lock_init(&proc->outer_lock); 4729 spin_lock_init(&proc->outer_lock);
4715 get_task_struct(current->group_leader); 4730 get_task_struct(current->group_leader);
4716 proc->tsk = current->group_leader; 4731 proc->tsk = current->group_leader;
4732 mutex_init(&proc->files_lock);
4717 INIT_LIST_HEAD(&proc->todo); 4733 INIT_LIST_HEAD(&proc->todo);
4718 proc->default_priority = task_nice(current); 4734 proc->default_priority = task_nice(current);
4719 binder_dev = container_of(filp->private_data, struct binder_device, 4735 binder_dev = container_of(filp->private_data, struct binder_device,
@@ -4966,9 +4982,11 @@ static void binder_deferred_func(struct work_struct *work)
4966 4982
4967 files = NULL; 4983 files = NULL;
4968 if (defer & BINDER_DEFERRED_PUT_FILES) { 4984 if (defer & BINDER_DEFERRED_PUT_FILES) {
4985 mutex_lock(&proc->files_lock);
4969 files = proc->files; 4986 files = proc->files;
4970 if (files) 4987 if (files)
4971 proc->files = NULL; 4988 proc->files = NULL;
4989 mutex_unlock(&proc->files_lock);
4972 } 4990 }
4973 4991
4974 if (defer & BINDER_DEFERRED_FLUSH) 4992 if (defer & BINDER_DEFERRED_FLUSH)
diff --git a/drivers/ata/ahci_mtk.c b/drivers/ata/ahci_mtk.c
index 80854f71559a..0ae6971c2a4c 100644
--- a/drivers/ata/ahci_mtk.c
+++ b/drivers/ata/ahci_mtk.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * MeidaTek AHCI SATA driver 2 * MediaTek AHCI SATA driver
3 * 3 *
4 * Copyright (c) 2017 MediaTek Inc. 4 * Copyright (c) 2017 MediaTek Inc.
5 * Author: Ryder Lee <ryder.lee@mediatek.com> 5 * Author: Ryder Lee <ryder.lee@mediatek.com>
@@ -25,7 +25,7 @@
25#include <linux/reset.h> 25#include <linux/reset.h>
26#include "ahci.h" 26#include "ahci.h"
27 27
28#define DRV_NAME "ahci" 28#define DRV_NAME "ahci-mtk"
29 29
30#define SYS_CFG 0x14 30#define SYS_CFG 0x14
31#define SYS_CFG_SATA_MSK GENMASK(31, 30) 31#define SYS_CFG_SATA_MSK GENMASK(31, 30)
@@ -192,5 +192,5 @@ static struct platform_driver mtk_ahci_driver = {
192}; 192};
193module_platform_driver(mtk_ahci_driver); 193module_platform_driver(mtk_ahci_driver);
194 194
195MODULE_DESCRIPTION("MeidaTek SATA AHCI Driver"); 195MODULE_DESCRIPTION("MediaTek SATA AHCI Driver");
196MODULE_LICENSE("GPL v2"); 196MODULE_LICENSE("GPL v2");
diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c
index b6b0bf76dfc7..2685f28160f7 100644
--- a/drivers/ata/ahci_qoriq.c
+++ b/drivers/ata/ahci_qoriq.c
@@ -35,6 +35,8 @@
35 35
36/* port register default value */ 36/* port register default value */
37#define AHCI_PORT_PHY_1_CFG 0xa003fffe 37#define AHCI_PORT_PHY_1_CFG 0xa003fffe
38#define AHCI_PORT_PHY2_CFG 0x28184d1f
39#define AHCI_PORT_PHY3_CFG 0x0e081509
38#define AHCI_PORT_TRANS_CFG 0x08000029 40#define AHCI_PORT_TRANS_CFG 0x08000029
39#define AHCI_PORT_AXICC_CFG 0x3fffffff 41#define AHCI_PORT_AXICC_CFG 0x3fffffff
40 42
@@ -183,6 +185,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
183 writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2, 185 writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2,
184 qpriv->ecc_addr); 186 qpriv->ecc_addr);
185 writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1); 187 writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
188 writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2);
189 writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3);
186 writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS); 190 writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
187 if (qpriv->is_dmacoherent) 191 if (qpriv->is_dmacoherent)
188 writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC); 192 writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC);
@@ -190,6 +194,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
190 194
191 case AHCI_LS2080A: 195 case AHCI_LS2080A:
192 writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1); 196 writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
197 writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2);
198 writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3);
193 writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS); 199 writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
194 if (qpriv->is_dmacoherent) 200 if (qpriv->is_dmacoherent)
195 writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC); 201 writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC);
@@ -201,6 +207,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
201 writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2, 207 writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2,
202 qpriv->ecc_addr); 208 qpriv->ecc_addr);
203 writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1); 209 writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
210 writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2);
211 writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3);
204 writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS); 212 writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
205 if (qpriv->is_dmacoherent) 213 if (qpriv->is_dmacoherent)
206 writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC); 214 writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC);
@@ -212,6 +220,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
212 writel(readl(qpriv->ecc_addr) | ECC_DIS_LS1088A, 220 writel(readl(qpriv->ecc_addr) | ECC_DIS_LS1088A,
213 qpriv->ecc_addr); 221 qpriv->ecc_addr);
214 writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1); 222 writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
223 writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2);
224 writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3);
215 writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS); 225 writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
216 if (qpriv->is_dmacoherent) 226 if (qpriv->is_dmacoherent)
217 writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC); 227 writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC);
@@ -219,6 +229,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
219 229
220 case AHCI_LS2088A: 230 case AHCI_LS2088A:
221 writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1); 231 writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
232 writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2);
233 writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3);
222 writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS); 234 writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
223 if (qpriv->is_dmacoherent) 235 if (qpriv->is_dmacoherent)
224 writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC); 236 writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 2a882929de4a..8193b38a1cae 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3082,13 +3082,19 @@ int sata_down_spd_limit(struct ata_link *link, u32 spd_limit)
3082 bit = fls(mask) - 1; 3082 bit = fls(mask) - 1;
3083 mask &= ~(1 << bit); 3083 mask &= ~(1 << bit);
3084 3084
3085 /* Mask off all speeds higher than or equal to the current 3085 /*
3086 * one. Force 1.5Gbps if current SPD is not available. 3086 * Mask off all speeds higher than or equal to the current one. At
3087 * this point, if current SPD is not available and we previously
3088 * recorded the link speed from SStatus, the driver has already
3089 * masked off the highest bit so mask should already be 1 or 0.
3090 * Otherwise, we should not force 1.5Gbps on a link where we have
3091 * not previously recorded speed from SStatus. Just return in this
3092 * case.
3087 */ 3093 */
3088 if (spd > 1) 3094 if (spd > 1)
3089 mask &= (1 << (spd - 1)) - 1; 3095 mask &= (1 << (spd - 1)) - 1;
3090 else 3096 else
3091 mask &= 1; 3097 return -EINVAL;
3092 3098
3093 /* were we already at the bottom? */ 3099 /* were we already at the bottom? */
3094 if (!mask) 3100 if (!mask)
diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c
index ffd8d33c6e0f..6db2e34bd52f 100644
--- a/drivers/ata/pata_pdc2027x.c
+++ b/drivers/ata/pata_pdc2027x.c
@@ -82,7 +82,7 @@ static int pdc2027x_set_mode(struct ata_link *link, struct ata_device **r_failed
82 * is issued to the device. However, if the controller clock is 133MHz, 82 * is issued to the device. However, if the controller clock is 133MHz,
83 * the following tables must be used. 83 * the following tables must be used.
84 */ 84 */
85static struct pdc2027x_pio_timing { 85static const struct pdc2027x_pio_timing {
86 u8 value0, value1, value2; 86 u8 value0, value1, value2;
87} pdc2027x_pio_timing_tbl[] = { 87} pdc2027x_pio_timing_tbl[] = {
88 { 0xfb, 0x2b, 0xac }, /* PIO mode 0 */ 88 { 0xfb, 0x2b, 0xac }, /* PIO mode 0 */
@@ -92,7 +92,7 @@ static struct pdc2027x_pio_timing {
92 { 0x23, 0x09, 0x25 }, /* PIO mode 4, IORDY on, Prefetch off */ 92 { 0x23, 0x09, 0x25 }, /* PIO mode 4, IORDY on, Prefetch off */
93}; 93};
94 94
95static struct pdc2027x_mdma_timing { 95static const struct pdc2027x_mdma_timing {
96 u8 value0, value1; 96 u8 value0, value1;
97} pdc2027x_mdma_timing_tbl[] = { 97} pdc2027x_mdma_timing_tbl[] = {
98 { 0xdf, 0x5f }, /* MDMA mode 0 */ 98 { 0xdf, 0x5f }, /* MDMA mode 0 */
@@ -100,7 +100,7 @@ static struct pdc2027x_mdma_timing {
100 { 0x69, 0x25 }, /* MDMA mode 2 */ 100 { 0x69, 0x25 }, /* MDMA mode 2 */
101}; 101};
102 102
103static struct pdc2027x_udma_timing { 103static const struct pdc2027x_udma_timing {
104 u8 value0, value1, value2; 104 u8 value0, value1, value2;
105} pdc2027x_udma_timing_tbl[] = { 105} pdc2027x_udma_timing_tbl[] = {
106 { 0x4a, 0x0f, 0xd5 }, /* UDMA mode 0 */ 106 { 0x4a, 0x0f, 0xd5 }, /* UDMA mode 0 */
@@ -649,7 +649,7 @@ static long pdc_detect_pll_input_clock(struct ata_host *host)
649 * @host: target ATA host 649 * @host: target ATA host
650 * @board_idx: board identifier 650 * @board_idx: board identifier
651 */ 651 */
652static int pdc_hardware_init(struct ata_host *host, unsigned int board_idx) 652static void pdc_hardware_init(struct ata_host *host, unsigned int board_idx)
653{ 653{
654 long pll_clock; 654 long pll_clock;
655 655
@@ -665,8 +665,6 @@ static int pdc_hardware_init(struct ata_host *host, unsigned int board_idx)
665 665
666 /* Adjust PLL control register */ 666 /* Adjust PLL control register */
667 pdc_adjust_pll(host, pll_clock, board_idx); 667 pdc_adjust_pll(host, pll_clock, board_idx);
668
669 return 0;
670} 668}
671 669
672/** 670/**
@@ -753,8 +751,7 @@ static int pdc2027x_init_one(struct pci_dev *pdev,
753 //pci_enable_intx(pdev); 751 //pci_enable_intx(pdev);
754 752
755 /* initialize adapter */ 753 /* initialize adapter */
756 if (pdc_hardware_init(host, board_idx) != 0) 754 pdc_hardware_init(host, board_idx);
757 return -EIO;
758 755
759 pci_set_master(pdev); 756 pci_set_master(pdev);
760 return ata_host_activate(host, pdev->irq, ata_bmdma_interrupt, 757 return ata_host_activate(host, pdev->irq, ata_bmdma_interrupt,
@@ -778,8 +775,7 @@ static int pdc2027x_reinit_one(struct pci_dev *pdev)
778 else 775 else
779 board_idx = PDC_UDMA_133; 776 board_idx = PDC_UDMA_133;
780 777
781 if (pdc_hardware_init(host, board_idx)) 778 pdc_hardware_init(host, board_idx);
782 return -EIO;
783 779
784 ata_host_resume(host); 780 ata_host_resume(host);
785 return 0; 781 return 0;
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index eb3af2739537..07532d83be0b 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -186,6 +186,11 @@ static void cache_associativity(struct cacheinfo *this_leaf)
186 this_leaf->ways_of_associativity = (size / nr_sets) / line_size; 186 this_leaf->ways_of_associativity = (size / nr_sets) / line_size;
187} 187}
188 188
189static bool cache_node_is_unified(struct cacheinfo *this_leaf)
190{
191 return of_property_read_bool(this_leaf->of_node, "cache-unified");
192}
193
189static void cache_of_override_properties(unsigned int cpu) 194static void cache_of_override_properties(unsigned int cpu)
190{ 195{
191 int index; 196 int index;
@@ -194,6 +199,14 @@ static void cache_of_override_properties(unsigned int cpu)
194 199
195 for (index = 0; index < cache_leaves(cpu); index++) { 200 for (index = 0; index < cache_leaves(cpu); index++) {
196 this_leaf = this_cpu_ci->info_list + index; 201 this_leaf = this_cpu_ci->info_list + index;
202 /*
203 * init_cache_level must setup the cache level correctly
204 * overriding the architecturally specified levels, so
205 * if type is NONE at this stage, it should be unified
206 */
207 if (this_leaf->type == CACHE_TYPE_NOCACHE &&
208 cache_node_is_unified(this_leaf))
209 this_leaf->type = CACHE_TYPE_UNIFIED;
197 cache_size(this_leaf); 210 cache_size(this_leaf);
198 cache_get_line_size(this_leaf); 211 cache_get_line_size(this_leaf);
199 cache_nr_sets(this_leaf); 212 cache_nr_sets(this_leaf);
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index db2f04415927..08744b572af6 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -526,6 +526,21 @@ static void dpm_watchdog_clear(struct dpm_watchdog *wd)
526/*------------------------- Resume routines -------------------------*/ 526/*------------------------- Resume routines -------------------------*/
527 527
528/** 528/**
529 * dev_pm_skip_next_resume_phases - Skip next system resume phases for device.
530 * @dev: Target device.
531 *
532 * Make the core skip the "early resume" and "resume" phases for @dev.
533 *
534 * This function can be called by middle-layer code during the "noirq" phase of
535 * system resume if necessary, but not by device drivers.
536 */
537void dev_pm_skip_next_resume_phases(struct device *dev)
538{
539 dev->power.is_late_suspended = false;
540 dev->power.is_suspended = false;
541}
542
543/**
529 * device_resume_noirq - Execute a "noirq resume" callback for given device. 544 * device_resume_noirq - Execute a "noirq resume" callback for given device.
530 * @dev: Device to handle. 545 * @dev: Device to handle.
531 * @state: PM transition of the system being carried out. 546 * @state: PM transition of the system being carried out.
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index ccb9975a97fa..ad0477ae820f 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -35,13 +35,13 @@ static inline u64 mb_per_tick(int mbps)
35struct nullb_cmd { 35struct nullb_cmd {
36 struct list_head list; 36 struct list_head list;
37 struct llist_node ll_list; 37 struct llist_node ll_list;
38 call_single_data_t csd; 38 struct __call_single_data csd;
39 struct request *rq; 39 struct request *rq;
40 struct bio *bio; 40 struct bio *bio;
41 unsigned int tag; 41 unsigned int tag;
42 blk_status_t error;
42 struct nullb_queue *nq; 43 struct nullb_queue *nq;
43 struct hrtimer timer; 44 struct hrtimer timer;
44 blk_status_t error;
45}; 45};
46 46
47struct nullb_queue { 47struct nullb_queue {
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 779869ed32b1..71fad747c0c7 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -199,6 +199,9 @@ struct smi_info {
199 /* The timer for this si. */ 199 /* The timer for this si. */
200 struct timer_list si_timer; 200 struct timer_list si_timer;
201 201
202 /* This flag is set, if the timer can be set */
203 bool timer_can_start;
204
202 /* This flag is set, if the timer is running (timer_pending() isn't enough) */ 205 /* This flag is set, if the timer is running (timer_pending() isn't enough) */
203 bool timer_running; 206 bool timer_running;
204 207
@@ -355,6 +358,8 @@ out:
355 358
356static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val) 359static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val)
357{ 360{
361 if (!smi_info->timer_can_start)
362 return;
358 smi_info->last_timeout_jiffies = jiffies; 363 smi_info->last_timeout_jiffies = jiffies;
359 mod_timer(&smi_info->si_timer, new_val); 364 mod_timer(&smi_info->si_timer, new_val);
360 smi_info->timer_running = true; 365 smi_info->timer_running = true;
@@ -374,21 +379,18 @@ static void start_new_msg(struct smi_info *smi_info, unsigned char *msg,
374 smi_info->handlers->start_transaction(smi_info->si_sm, msg, size); 379 smi_info->handlers->start_transaction(smi_info->si_sm, msg, size);
375} 380}
376 381
377static void start_check_enables(struct smi_info *smi_info, bool start_timer) 382static void start_check_enables(struct smi_info *smi_info)
378{ 383{
379 unsigned char msg[2]; 384 unsigned char msg[2];
380 385
381 msg[0] = (IPMI_NETFN_APP_REQUEST << 2); 386 msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
382 msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD; 387 msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
383 388
384 if (start_timer) 389 start_new_msg(smi_info, msg, 2);
385 start_new_msg(smi_info, msg, 2);
386 else
387 smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
388 smi_info->si_state = SI_CHECKING_ENABLES; 390 smi_info->si_state = SI_CHECKING_ENABLES;
389} 391}
390 392
391static void start_clear_flags(struct smi_info *smi_info, bool start_timer) 393static void start_clear_flags(struct smi_info *smi_info)
392{ 394{
393 unsigned char msg[3]; 395 unsigned char msg[3];
394 396
@@ -397,10 +399,7 @@ static void start_clear_flags(struct smi_info *smi_info, bool start_timer)
397 msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD; 399 msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD;
398 msg[2] = WDT_PRE_TIMEOUT_INT; 400 msg[2] = WDT_PRE_TIMEOUT_INT;
399 401
400 if (start_timer) 402 start_new_msg(smi_info, msg, 3);
401 start_new_msg(smi_info, msg, 3);
402 else
403 smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3);
404 smi_info->si_state = SI_CLEARING_FLAGS; 403 smi_info->si_state = SI_CLEARING_FLAGS;
405} 404}
406 405
@@ -435,11 +434,11 @@ static void start_getting_events(struct smi_info *smi_info)
435 * Note that we cannot just use disable_irq(), since the interrupt may 434 * Note that we cannot just use disable_irq(), since the interrupt may
436 * be shared. 435 * be shared.
437 */ 436 */
438static inline bool disable_si_irq(struct smi_info *smi_info, bool start_timer) 437static inline bool disable_si_irq(struct smi_info *smi_info)
439{ 438{
440 if ((smi_info->io.irq) && (!smi_info->interrupt_disabled)) { 439 if ((smi_info->io.irq) && (!smi_info->interrupt_disabled)) {
441 smi_info->interrupt_disabled = true; 440 smi_info->interrupt_disabled = true;
442 start_check_enables(smi_info, start_timer); 441 start_check_enables(smi_info);
443 return true; 442 return true;
444 } 443 }
445 return false; 444 return false;
@@ -449,7 +448,7 @@ static inline bool enable_si_irq(struct smi_info *smi_info)
449{ 448{
450 if ((smi_info->io.irq) && (smi_info->interrupt_disabled)) { 449 if ((smi_info->io.irq) && (smi_info->interrupt_disabled)) {
451 smi_info->interrupt_disabled = false; 450 smi_info->interrupt_disabled = false;
452 start_check_enables(smi_info, true); 451 start_check_enables(smi_info);
453 return true; 452 return true;
454 } 453 }
455 return false; 454 return false;
@@ -467,7 +466,7 @@ static struct ipmi_smi_msg *alloc_msg_handle_irq(struct smi_info *smi_info)
467 466
468 msg = ipmi_alloc_smi_msg(); 467 msg = ipmi_alloc_smi_msg();
469 if (!msg) { 468 if (!msg) {
470 if (!disable_si_irq(smi_info, true)) 469 if (!disable_si_irq(smi_info))
471 smi_info->si_state = SI_NORMAL; 470 smi_info->si_state = SI_NORMAL;
472 } else if (enable_si_irq(smi_info)) { 471 } else if (enable_si_irq(smi_info)) {
473 ipmi_free_smi_msg(msg); 472 ipmi_free_smi_msg(msg);
@@ -483,7 +482,7 @@ retry:
483 /* Watchdog pre-timeout */ 482 /* Watchdog pre-timeout */
484 smi_inc_stat(smi_info, watchdog_pretimeouts); 483 smi_inc_stat(smi_info, watchdog_pretimeouts);
485 484
486 start_clear_flags(smi_info, true); 485 start_clear_flags(smi_info);
487 smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT; 486 smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT;
488 if (smi_info->intf) 487 if (smi_info->intf)
489 ipmi_smi_watchdog_pretimeout(smi_info->intf); 488 ipmi_smi_watchdog_pretimeout(smi_info->intf);
@@ -866,7 +865,7 @@ restart:
866 * disable and messages disabled. 865 * disable and messages disabled.
867 */ 866 */
868 if (smi_info->supports_event_msg_buff || smi_info->io.irq) { 867 if (smi_info->supports_event_msg_buff || smi_info->io.irq) {
869 start_check_enables(smi_info, true); 868 start_check_enables(smi_info);
870 } else { 869 } else {
871 smi_info->curr_msg = alloc_msg_handle_irq(smi_info); 870 smi_info->curr_msg = alloc_msg_handle_irq(smi_info);
872 if (!smi_info->curr_msg) 871 if (!smi_info->curr_msg)
@@ -1167,6 +1166,7 @@ static int smi_start_processing(void *send_info,
1167 1166
1168 /* Set up the timer that drives the interface. */ 1167 /* Set up the timer that drives the interface. */
1169 timer_setup(&new_smi->si_timer, smi_timeout, 0); 1168 timer_setup(&new_smi->si_timer, smi_timeout, 0);
1169 new_smi->timer_can_start = true;
1170 smi_mod_timer(new_smi, jiffies + SI_TIMEOUT_JIFFIES); 1170 smi_mod_timer(new_smi, jiffies + SI_TIMEOUT_JIFFIES);
1171 1171
1172 /* Try to claim any interrupts. */ 1172 /* Try to claim any interrupts. */
@@ -1936,10 +1936,12 @@ static void check_for_broken_irqs(struct smi_info *smi_info)
1936 check_set_rcv_irq(smi_info); 1936 check_set_rcv_irq(smi_info);
1937} 1937}
1938 1938
1939static inline void wait_for_timer_and_thread(struct smi_info *smi_info) 1939static inline void stop_timer_and_thread(struct smi_info *smi_info)
1940{ 1940{
1941 if (smi_info->thread != NULL) 1941 if (smi_info->thread != NULL)
1942 kthread_stop(smi_info->thread); 1942 kthread_stop(smi_info->thread);
1943
1944 smi_info->timer_can_start = false;
1943 if (smi_info->timer_running) 1945 if (smi_info->timer_running)
1944 del_timer_sync(&smi_info->si_timer); 1946 del_timer_sync(&smi_info->si_timer);
1945} 1947}
@@ -2152,7 +2154,7 @@ static int try_smi_init(struct smi_info *new_smi)
2152 * Start clearing the flags before we enable interrupts or the 2154 * Start clearing the flags before we enable interrupts or the
2153 * timer to avoid racing with the timer. 2155 * timer to avoid racing with the timer.
2154 */ 2156 */
2155 start_clear_flags(new_smi, false); 2157 start_clear_flags(new_smi);
2156 2158
2157 /* 2159 /*
2158 * IRQ is defined to be set when non-zero. req_events will 2160 * IRQ is defined to be set when non-zero. req_events will
@@ -2238,7 +2240,7 @@ out_err_remove_attrs:
2238 dev_set_drvdata(new_smi->io.dev, NULL); 2240 dev_set_drvdata(new_smi->io.dev, NULL);
2239 2241
2240out_err_stop_timer: 2242out_err_stop_timer:
2241 wait_for_timer_and_thread(new_smi); 2243 stop_timer_and_thread(new_smi);
2242 2244
2243out_err: 2245out_err:
2244 new_smi->interrupt_disabled = true; 2246 new_smi->interrupt_disabled = true;
@@ -2388,7 +2390,7 @@ static void cleanup_one_si(struct smi_info *to_clean)
2388 */ 2390 */
2389 if (to_clean->io.irq_cleanup) 2391 if (to_clean->io.irq_cleanup)
2390 to_clean->io.irq_cleanup(&to_clean->io); 2392 to_clean->io.irq_cleanup(&to_clean->io);
2391 wait_for_timer_and_thread(to_clean); 2393 stop_timer_and_thread(to_clean);
2392 2394
2393 /* 2395 /*
2394 * Timeouts are stopped, now make sure the interrupts are off 2396 * Timeouts are stopped, now make sure the interrupts are off
@@ -2400,7 +2402,7 @@ static void cleanup_one_si(struct smi_info *to_clean)
2400 schedule_timeout_uninterruptible(1); 2402 schedule_timeout_uninterruptible(1);
2401 } 2403 }
2402 if (to_clean->handlers) 2404 if (to_clean->handlers)
2403 disable_si_irq(to_clean, false); 2405 disable_si_irq(to_clean);
2404 while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) { 2406 while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
2405 poll(to_clean); 2407 poll(to_clean);
2406 schedule_timeout_uninterruptible(1); 2408 schedule_timeout_uninterruptible(1);
diff --git a/drivers/char/ipmi/ipmi_si_parisc.c b/drivers/char/ipmi/ipmi_si_parisc.c
index 090b073ab441..6b10f0e18a95 100644
--- a/drivers/char/ipmi/ipmi_si_parisc.c
+++ b/drivers/char/ipmi/ipmi_si_parisc.c
@@ -10,6 +10,8 @@ static int __init ipmi_parisc_probe(struct parisc_device *dev)
10{ 10{
11 struct si_sm_io io; 11 struct si_sm_io io;
12 12
13 memset(&io, 0, sizeof(io));
14
13 io.si_type = SI_KCS; 15 io.si_type = SI_KCS;
14 io.addr_source = SI_DEVICETREE; 16 io.addr_source = SI_DEVICETREE;
15 io.addr_type = IPMI_MEM_ADDR_SPACE; 17 io.addr_type = IPMI_MEM_ADDR_SPACE;
diff --git a/drivers/char/ipmi/ipmi_si_pci.c b/drivers/char/ipmi/ipmi_si_pci.c
index 99771f5cad07..27dd11c49d21 100644
--- a/drivers/char/ipmi/ipmi_si_pci.c
+++ b/drivers/char/ipmi/ipmi_si_pci.c
@@ -103,10 +103,13 @@ static int ipmi_pci_probe(struct pci_dev *pdev,
103 io.addr_source_cleanup = ipmi_pci_cleanup; 103 io.addr_source_cleanup = ipmi_pci_cleanup;
104 io.addr_source_data = pdev; 104 io.addr_source_data = pdev;
105 105
106 if (pci_resource_flags(pdev, 0) & IORESOURCE_IO) 106 if (pci_resource_flags(pdev, 0) & IORESOURCE_IO) {
107 io.addr_type = IPMI_IO_ADDR_SPACE; 107 io.addr_type = IPMI_IO_ADDR_SPACE;
108 else 108 io.io_setup = ipmi_si_port_setup;
109 } else {
109 io.addr_type = IPMI_MEM_ADDR_SPACE; 110 io.addr_type = IPMI_MEM_ADDR_SPACE;
111 io.io_setup = ipmi_si_mem_setup;
112 }
110 io.addr_data = pci_resource_start(pdev, 0); 113 io.addr_data = pci_resource_start(pdev, 0);
111 114
112 io.regspacing = ipmi_pci_probe_regspacing(&io); 115 io.regspacing = ipmi_pci_probe_regspacing(&io);
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 647d056df88c..b56c11f51baf 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -220,7 +220,8 @@ static bool clk_core_is_enabled(struct clk_core *core)
220 220
221 ret = core->ops->is_enabled(core->hw); 221 ret = core->ops->is_enabled(core->hw);
222done: 222done:
223 clk_pm_runtime_put(core); 223 if (core->dev)
224 pm_runtime_put(core->dev);
224 225
225 return ret; 226 return ret;
226} 227}
@@ -1564,6 +1565,9 @@ static void clk_change_rate(struct clk_core *core)
1564 best_parent_rate = core->parent->rate; 1565 best_parent_rate = core->parent->rate;
1565 } 1566 }
1566 1567
1568 if (clk_pm_runtime_get(core))
1569 return;
1570
1567 if (core->flags & CLK_SET_RATE_UNGATE) { 1571 if (core->flags & CLK_SET_RATE_UNGATE) {
1568 unsigned long flags; 1572 unsigned long flags;
1569 1573
@@ -1634,6 +1638,8 @@ static void clk_change_rate(struct clk_core *core)
1634 /* handle the new child who might not be in core->children yet */ 1638 /* handle the new child who might not be in core->children yet */
1635 if (core->new_child) 1639 if (core->new_child)
1636 clk_change_rate(core->new_child); 1640 clk_change_rate(core->new_child);
1641
1642 clk_pm_runtime_put(core);
1637} 1643}
1638 1644
1639static int clk_core_set_rate_nolock(struct clk_core *core, 1645static int clk_core_set_rate_nolock(struct clk_core *core,
diff --git a/drivers/clk/sunxi/clk-sun9i-mmc.c b/drivers/clk/sunxi/clk-sun9i-mmc.c
index a1a634253d6f..f00d8758ba24 100644
--- a/drivers/clk/sunxi/clk-sun9i-mmc.c
+++ b/drivers/clk/sunxi/clk-sun9i-mmc.c
@@ -16,6 +16,7 @@
16 16
17#include <linux/clk.h> 17#include <linux/clk.h>
18#include <linux/clk-provider.h> 18#include <linux/clk-provider.h>
19#include <linux/delay.h>
19#include <linux/init.h> 20#include <linux/init.h>
20#include <linux/of.h> 21#include <linux/of.h>
21#include <linux/of_device.h> 22#include <linux/of_device.h>
@@ -83,9 +84,20 @@ static int sun9i_mmc_reset_deassert(struct reset_controller_dev *rcdev,
83 return 0; 84 return 0;
84} 85}
85 86
87static int sun9i_mmc_reset_reset(struct reset_controller_dev *rcdev,
88 unsigned long id)
89{
90 sun9i_mmc_reset_assert(rcdev, id);
91 udelay(10);
92 sun9i_mmc_reset_deassert(rcdev, id);
93
94 return 0;
95}
96
86static const struct reset_control_ops sun9i_mmc_reset_ops = { 97static const struct reset_control_ops sun9i_mmc_reset_ops = {
87 .assert = sun9i_mmc_reset_assert, 98 .assert = sun9i_mmc_reset_assert,
88 .deassert = sun9i_mmc_reset_deassert, 99 .deassert = sun9i_mmc_reset_deassert,
100 .reset = sun9i_mmc_reset_reset,
89}; 101};
90 102
91static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev) 103static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev)
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 58d4f4e1ad6a..ca38229b045a 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -22,6 +22,8 @@
22 22
23#include "cpufreq_governor.h" 23#include "cpufreq_governor.h"
24 24
25#define CPUFREQ_DBS_MIN_SAMPLING_INTERVAL (2 * TICK_NSEC / NSEC_PER_USEC)
26
25static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs); 27static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs);
26 28
27static DEFINE_MUTEX(gov_dbs_data_mutex); 29static DEFINE_MUTEX(gov_dbs_data_mutex);
@@ -47,11 +49,15 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
47{ 49{
48 struct dbs_data *dbs_data = to_dbs_data(attr_set); 50 struct dbs_data *dbs_data = to_dbs_data(attr_set);
49 struct policy_dbs_info *policy_dbs; 51 struct policy_dbs_info *policy_dbs;
52 unsigned int sampling_interval;
50 int ret; 53 int ret;
51 ret = sscanf(buf, "%u", &dbs_data->sampling_rate); 54
52 if (ret != 1) 55 ret = sscanf(buf, "%u", &sampling_interval);
56 if (ret != 1 || sampling_interval < CPUFREQ_DBS_MIN_SAMPLING_INTERVAL)
53 return -EINVAL; 57 return -EINVAL;
54 58
59 dbs_data->sampling_rate = sampling_interval;
60
55 /* 61 /*
56 * We are operating under dbs_data->mutex and so the list and its 62 * We are operating under dbs_data->mutex and so the list and its
57 * entries can't be freed concurrently. 63 * entries can't be freed concurrently.
@@ -430,7 +436,14 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
430 if (ret) 436 if (ret)
431 goto free_policy_dbs_info; 437 goto free_policy_dbs_info;
432 438
433 dbs_data->sampling_rate = cpufreq_policy_transition_delay_us(policy); 439 /*
440 * The sampling interval should not be less than the transition latency
441 * of the CPU and it also cannot be too small for dbs_update() to work
442 * correctly.
443 */
444 dbs_data->sampling_rate = max_t(unsigned int,
445 CPUFREQ_DBS_MIN_SAMPLING_INTERVAL,
446 cpufreq_policy_transition_delay_us(policy));
434 447
435 if (!have_governor_per_policy()) 448 if (!have_governor_per_policy())
436 gov->gdbs_data = dbs_data; 449 gov->gdbs_data = dbs_data;
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 628fe899cb48..d9b2c2de49c4 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -226,17 +226,18 @@ static void imx6q_opp_check_speed_grading(struct device *dev)
226 val >>= OCOTP_CFG3_SPEED_SHIFT; 226 val >>= OCOTP_CFG3_SPEED_SHIFT;
227 val &= 0x3; 227 val &= 0x3;
228 228
229 if ((val != OCOTP_CFG3_SPEED_1P2GHZ) &&
230 of_machine_is_compatible("fsl,imx6q"))
231 if (dev_pm_opp_disable(dev, 1200000000))
232 dev_warn(dev, "failed to disable 1.2GHz OPP\n");
233 if (val < OCOTP_CFG3_SPEED_996MHZ) 229 if (val < OCOTP_CFG3_SPEED_996MHZ)
234 if (dev_pm_opp_disable(dev, 996000000)) 230 if (dev_pm_opp_disable(dev, 996000000))
235 dev_warn(dev, "failed to disable 996MHz OPP\n"); 231 dev_warn(dev, "failed to disable 996MHz OPP\n");
236 if (of_machine_is_compatible("fsl,imx6q")) { 232
233 if (of_machine_is_compatible("fsl,imx6q") ||
234 of_machine_is_compatible("fsl,imx6qp")) {
237 if (val != OCOTP_CFG3_SPEED_852MHZ) 235 if (val != OCOTP_CFG3_SPEED_852MHZ)
238 if (dev_pm_opp_disable(dev, 852000000)) 236 if (dev_pm_opp_disable(dev, 852000000))
239 dev_warn(dev, "failed to disable 852MHz OPP\n"); 237 dev_warn(dev, "failed to disable 852MHz OPP\n");
238 if (val != OCOTP_CFG3_SPEED_1P2GHZ)
239 if (dev_pm_opp_disable(dev, 1200000000))
240 dev_warn(dev, "failed to disable 1.2GHz OPP\n");
240 } 241 }
241 iounmap(base); 242 iounmap(base);
242put_node: 243put_node:
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index fbab271b3bf9..a861b5b4d443 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -708,7 +708,7 @@ atc_prep_dma_interleaved(struct dma_chan *chan,
708 unsigned long flags) 708 unsigned long flags)
709{ 709{
710 struct at_dma_chan *atchan = to_at_dma_chan(chan); 710 struct at_dma_chan *atchan = to_at_dma_chan(chan);
711 struct data_chunk *first = xt->sgl; 711 struct data_chunk *first;
712 struct at_desc *desc = NULL; 712 struct at_desc *desc = NULL;
713 size_t xfer_count; 713 size_t xfer_count;
714 unsigned int dwidth; 714 unsigned int dwidth;
@@ -720,6 +720,8 @@ atc_prep_dma_interleaved(struct dma_chan *chan,
720 if (unlikely(!xt || xt->numf != 1 || !xt->frame_size)) 720 if (unlikely(!xt || xt->numf != 1 || !xt->frame_size))
721 return NULL; 721 return NULL;
722 722
723 first = xt->sgl;
724
723 dev_info(chan2dev(chan), 725 dev_info(chan2dev(chan),
724 "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n", 726 "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n",
725 __func__, &xt->src_start, &xt->dst_start, xt->numf, 727 __func__, &xt->src_start, &xt->dst_start, xt->numf,
diff --git a/drivers/dma/dma-jz4740.c b/drivers/dma/dma-jz4740.c
index d50273fed715..afd5e10f8927 100644
--- a/drivers/dma/dma-jz4740.c
+++ b/drivers/dma/dma-jz4740.c
@@ -555,7 +555,7 @@ static int jz4740_dma_probe(struct platform_device *pdev)
555 555
556 ret = dma_async_device_register(dd); 556 ret = dma_async_device_register(dd);
557 if (ret) 557 if (ret)
558 return ret; 558 goto err_clk;
559 559
560 irq = platform_get_irq(pdev, 0); 560 irq = platform_get_irq(pdev, 0);
561 ret = request_irq(irq, jz4740_dma_irq, 0, dev_name(&pdev->dev), dmadev); 561 ret = request_irq(irq, jz4740_dma_irq, 0, dev_name(&pdev->dev), dmadev);
@@ -568,6 +568,8 @@ static int jz4740_dma_probe(struct platform_device *pdev)
568 568
569err_unregister: 569err_unregister:
570 dma_async_device_unregister(dd); 570 dma_async_device_unregister(dd);
571err_clk:
572 clk_disable_unprepare(dmadev->clk);
571 return ret; 573 return ret;
572} 574}
573 575
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 47edc7fbf91f..ec5f9d2bc820 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -155,6 +155,12 @@ MODULE_PARM_DESC(run, "Run the test (default: false)");
155#define PATTERN_COUNT_MASK 0x1f 155#define PATTERN_COUNT_MASK 0x1f
156#define PATTERN_MEMSET_IDX 0x01 156#define PATTERN_MEMSET_IDX 0x01
157 157
158/* poor man's completion - we want to use wait_event_freezable() on it */
159struct dmatest_done {
160 bool done;
161 wait_queue_head_t *wait;
162};
163
158struct dmatest_thread { 164struct dmatest_thread {
159 struct list_head node; 165 struct list_head node;
160 struct dmatest_info *info; 166 struct dmatest_info *info;
@@ -165,6 +171,8 @@ struct dmatest_thread {
165 u8 **dsts; 171 u8 **dsts;
166 u8 **udsts; 172 u8 **udsts;
167 enum dma_transaction_type type; 173 enum dma_transaction_type type;
174 wait_queue_head_t done_wait;
175 struct dmatest_done test_done;
168 bool done; 176 bool done;
169}; 177};
170 178
@@ -342,18 +350,25 @@ static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
342 return error_count; 350 return error_count;
343} 351}
344 352
345/* poor man's completion - we want to use wait_event_freezable() on it */
346struct dmatest_done {
347 bool done;
348 wait_queue_head_t *wait;
349};
350 353
351static void dmatest_callback(void *arg) 354static void dmatest_callback(void *arg)
352{ 355{
353 struct dmatest_done *done = arg; 356 struct dmatest_done *done = arg;
354 357 struct dmatest_thread *thread =
355 done->done = true; 358 container_of(arg, struct dmatest_thread, done_wait);
356 wake_up_all(done->wait); 359 if (!thread->done) {
360 done->done = true;
361 wake_up_all(done->wait);
362 } else {
363 /*
364 * If thread->done, it means that this callback occurred
365 * after the parent thread has cleaned up. This can
366 * happen in the case that driver doesn't implement
367 * the terminate_all() functionality and a dma operation
368 * did not occur within the timeout period
369 */
370 WARN(1, "dmatest: Kernel memory may be corrupted!!\n");
371 }
357} 372}
358 373
359static unsigned int min_odd(unsigned int x, unsigned int y) 374static unsigned int min_odd(unsigned int x, unsigned int y)
@@ -424,9 +439,8 @@ static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len)
424 */ 439 */
425static int dmatest_func(void *data) 440static int dmatest_func(void *data)
426{ 441{
427 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_wait);
428 struct dmatest_thread *thread = data; 442 struct dmatest_thread *thread = data;
429 struct dmatest_done done = { .wait = &done_wait }; 443 struct dmatest_done *done = &thread->test_done;
430 struct dmatest_info *info; 444 struct dmatest_info *info;
431 struct dmatest_params *params; 445 struct dmatest_params *params;
432 struct dma_chan *chan; 446 struct dma_chan *chan;
@@ -673,9 +687,9 @@ static int dmatest_func(void *data)
673 continue; 687 continue;
674 } 688 }
675 689
676 done.done = false; 690 done->done = false;
677 tx->callback = dmatest_callback; 691 tx->callback = dmatest_callback;
678 tx->callback_param = &done; 692 tx->callback_param = done;
679 cookie = tx->tx_submit(tx); 693 cookie = tx->tx_submit(tx);
680 694
681 if (dma_submit_error(cookie)) { 695 if (dma_submit_error(cookie)) {
@@ -688,21 +702,12 @@ static int dmatest_func(void *data)
688 } 702 }
689 dma_async_issue_pending(chan); 703 dma_async_issue_pending(chan);
690 704
691 wait_event_freezable_timeout(done_wait, done.done, 705 wait_event_freezable_timeout(thread->done_wait, done->done,
692 msecs_to_jiffies(params->timeout)); 706 msecs_to_jiffies(params->timeout));
693 707
694 status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); 708 status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
695 709
696 if (!done.done) { 710 if (!done->done) {
697 /*
698 * We're leaving the timed out dma operation with
699 * dangling pointer to done_wait. To make this
700 * correct, we'll need to allocate wait_done for
701 * each test iteration and perform "who's gonna
702 * free it this time?" dancing. For now, just
703 * leave it dangling.
704 */
705 WARN(1, "dmatest: Kernel stack may be corrupted!!\n");
706 dmaengine_unmap_put(um); 711 dmaengine_unmap_put(um);
707 result("test timed out", total_tests, src_off, dst_off, 712 result("test timed out", total_tests, src_off, dst_off,
708 len, 0); 713 len, 0);
@@ -789,7 +794,7 @@ err_thread_type:
789 dmatest_KBs(runtime, total_len), ret); 794 dmatest_KBs(runtime, total_len), ret);
790 795
791 /* terminate all transfers on specified channels */ 796 /* terminate all transfers on specified channels */
792 if (ret) 797 if (ret || failed_tests)
793 dmaengine_terminate_all(chan); 798 dmaengine_terminate_all(chan);
794 799
795 thread->done = true; 800 thread->done = true;
@@ -849,6 +854,8 @@ static int dmatest_add_threads(struct dmatest_info *info,
849 thread->info = info; 854 thread->info = info;
850 thread->chan = dtc->chan; 855 thread->chan = dtc->chan;
851 thread->type = type; 856 thread->type = type;
857 thread->test_done.wait = &thread->done_wait;
858 init_waitqueue_head(&thread->done_wait);
852 smp_wmb(); 859 smp_wmb();
853 thread->task = kthread_create(dmatest_func, thread, "%s-%s%u", 860 thread->task = kthread_create(dmatest_func, thread, "%s-%s%u",
854 dma_chan_name(chan), op, i); 861 dma_chan_name(chan), op, i);
diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c
index 6775f2c74e25..c7568869284e 100644
--- a/drivers/dma/fsl-edma.c
+++ b/drivers/dma/fsl-edma.c
@@ -863,11 +863,11 @@ static void fsl_edma_irq_exit(
863 } 863 }
864} 864}
865 865
866static void fsl_disable_clocks(struct fsl_edma_engine *fsl_edma) 866static void fsl_disable_clocks(struct fsl_edma_engine *fsl_edma, int nr_clocks)
867{ 867{
868 int i; 868 int i;
869 869
870 for (i = 0; i < DMAMUX_NR; i++) 870 for (i = 0; i < nr_clocks; i++)
871 clk_disable_unprepare(fsl_edma->muxclk[i]); 871 clk_disable_unprepare(fsl_edma->muxclk[i]);
872} 872}
873 873
@@ -904,25 +904,25 @@ static int fsl_edma_probe(struct platform_device *pdev)
904 904
905 res = platform_get_resource(pdev, IORESOURCE_MEM, 1 + i); 905 res = platform_get_resource(pdev, IORESOURCE_MEM, 1 + i);
906 fsl_edma->muxbase[i] = devm_ioremap_resource(&pdev->dev, res); 906 fsl_edma->muxbase[i] = devm_ioremap_resource(&pdev->dev, res);
907 if (IS_ERR(fsl_edma->muxbase[i])) 907 if (IS_ERR(fsl_edma->muxbase[i])) {
908 /* on error: disable all previously enabled clks */
909 fsl_disable_clocks(fsl_edma, i);
908 return PTR_ERR(fsl_edma->muxbase[i]); 910 return PTR_ERR(fsl_edma->muxbase[i]);
911 }
909 912
910 sprintf(clkname, "dmamux%d", i); 913 sprintf(clkname, "dmamux%d", i);
911 fsl_edma->muxclk[i] = devm_clk_get(&pdev->dev, clkname); 914 fsl_edma->muxclk[i] = devm_clk_get(&pdev->dev, clkname);
912 if (IS_ERR(fsl_edma->muxclk[i])) { 915 if (IS_ERR(fsl_edma->muxclk[i])) {
913 dev_err(&pdev->dev, "Missing DMAMUX block clock.\n"); 916 dev_err(&pdev->dev, "Missing DMAMUX block clock.\n");
917 /* on error: disable all previously enabled clks */
918 fsl_disable_clocks(fsl_edma, i);
914 return PTR_ERR(fsl_edma->muxclk[i]); 919 return PTR_ERR(fsl_edma->muxclk[i]);
915 } 920 }
916 921
917 ret = clk_prepare_enable(fsl_edma->muxclk[i]); 922 ret = clk_prepare_enable(fsl_edma->muxclk[i]);
918 if (ret) { 923 if (ret)
919 /* disable only clks which were enabled on error */ 924 /* on error: disable all previously enabled clks */
920 for (; i >= 0; i--) 925 fsl_disable_clocks(fsl_edma, i);
921 clk_disable_unprepare(fsl_edma->muxclk[i]);
922
923 dev_err(&pdev->dev, "DMAMUX clk block failed.\n");
924 return ret;
925 }
926 926
927 } 927 }
928 928
@@ -976,7 +976,7 @@ static int fsl_edma_probe(struct platform_device *pdev)
976 if (ret) { 976 if (ret) {
977 dev_err(&pdev->dev, 977 dev_err(&pdev->dev,
978 "Can't register Freescale eDMA engine. (%d)\n", ret); 978 "Can't register Freescale eDMA engine. (%d)\n", ret);
979 fsl_disable_clocks(fsl_edma); 979 fsl_disable_clocks(fsl_edma, DMAMUX_NR);
980 return ret; 980 return ret;
981 } 981 }
982 982
@@ -985,7 +985,7 @@ static int fsl_edma_probe(struct platform_device *pdev)
985 dev_err(&pdev->dev, 985 dev_err(&pdev->dev,
986 "Can't register Freescale eDMA of_dma. (%d)\n", ret); 986 "Can't register Freescale eDMA of_dma. (%d)\n", ret);
987 dma_async_device_unregister(&fsl_edma->dma_dev); 987 dma_async_device_unregister(&fsl_edma->dma_dev);
988 fsl_disable_clocks(fsl_edma); 988 fsl_disable_clocks(fsl_edma, DMAMUX_NR);
989 return ret; 989 return ret;
990 } 990 }
991 991
@@ -1015,7 +1015,7 @@ static int fsl_edma_remove(struct platform_device *pdev)
1015 fsl_edma_cleanup_vchan(&fsl_edma->dma_dev); 1015 fsl_edma_cleanup_vchan(&fsl_edma->dma_dev);
1016 of_dma_controller_free(np); 1016 of_dma_controller_free(np);
1017 dma_async_device_unregister(&fsl_edma->dma_dev); 1017 dma_async_device_unregister(&fsl_edma->dma_dev);
1018 fsl_disable_clocks(fsl_edma); 1018 fsl_disable_clocks(fsl_edma, DMAMUX_NR);
1019 1019
1020 return 0; 1020 return 0;
1021} 1021}
diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index 2f31d3d0caa6..7792a9186f9c 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -390,7 +390,7 @@ static int ioat_dma_self_test(struct ioatdma_device *ioat_dma)
390 if (memcmp(src, dest, IOAT_TEST_SIZE)) { 390 if (memcmp(src, dest, IOAT_TEST_SIZE)) {
391 dev_err(dev, "Self-test copy failed compare, disabling\n"); 391 dev_err(dev, "Self-test copy failed compare, disabling\n");
392 err = -ENODEV; 392 err = -ENODEV;
393 goto free_resources; 393 goto unmap_dma;
394 } 394 }
395 395
396unmap_dma: 396unmap_dma:
diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c
index dfcf56ee3c61..76861a00bb92 100644
--- a/drivers/gpio/gpio-bcm-kona.c
+++ b/drivers/gpio/gpio-bcm-kona.c
@@ -522,6 +522,7 @@ static struct of_device_id const bcm_kona_gpio_of_match[] = {
522 * category than their parents, so it won't report false recursion. 522 * category than their parents, so it won't report false recursion.
523 */ 523 */
524static struct lock_class_key gpio_lock_class; 524static struct lock_class_key gpio_lock_class;
525static struct lock_class_key gpio_request_class;
525 526
526static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq, 527static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq,
527 irq_hw_number_t hwirq) 528 irq_hw_number_t hwirq)
@@ -531,7 +532,7 @@ static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq,
531 ret = irq_set_chip_data(irq, d->host_data); 532 ret = irq_set_chip_data(irq, d->host_data);
532 if (ret < 0) 533 if (ret < 0)
533 return ret; 534 return ret;
534 irq_set_lockdep_class(irq, &gpio_lock_class); 535 irq_set_lockdep_class(irq, &gpio_lock_class, &gpio_request_class);
535 irq_set_chip_and_handler(irq, &bcm_gpio_irq_chip, handle_simple_irq); 536 irq_set_chip_and_handler(irq, &bcm_gpio_irq_chip, handle_simple_irq);
536 irq_set_noprobe(irq); 537 irq_set_noprobe(irq);
537 538
diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c
index 545d43a587b7..bb4f8cf18bd9 100644
--- a/drivers/gpio/gpio-brcmstb.c
+++ b/drivers/gpio/gpio-brcmstb.c
@@ -327,6 +327,7 @@ static struct brcmstb_gpio_bank *brcmstb_gpio_hwirq_to_bank(
327 * category than their parents, so it won't report false recursion. 327 * category than their parents, so it won't report false recursion.
328 */ 328 */
329static struct lock_class_key brcmstb_gpio_irq_lock_class; 329static struct lock_class_key brcmstb_gpio_irq_lock_class;
330static struct lock_class_key brcmstb_gpio_irq_request_class;
330 331
331 332
332static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq, 333static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq,
@@ -346,7 +347,8 @@ static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq,
346 ret = irq_set_chip_data(irq, &bank->gc); 347 ret = irq_set_chip_data(irq, &bank->gc);
347 if (ret < 0) 348 if (ret < 0)
348 return ret; 349 return ret;
349 irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class); 350 irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class,
351 &brcmstb_gpio_irq_request_class);
350 irq_set_chip_and_handler(irq, &priv->irq_chip, handle_level_irq); 352 irq_set_chip_and_handler(irq, &priv->irq_chip, handle_level_irq);
351 irq_set_noprobe(irq); 353 irq_set_noprobe(irq);
352 return 0; 354 return 0;
diff --git a/drivers/gpio/gpio-reg.c b/drivers/gpio/gpio-reg.c
index 23e771dba4c1..e85903eddc68 100644
--- a/drivers/gpio/gpio-reg.c
+++ b/drivers/gpio/gpio-reg.c
@@ -103,8 +103,8 @@ static int gpio_reg_to_irq(struct gpio_chip *gc, unsigned offset)
103 struct gpio_reg *r = to_gpio_reg(gc); 103 struct gpio_reg *r = to_gpio_reg(gc);
104 int irq = r->irqs[offset]; 104 int irq = r->irqs[offset];
105 105
106 if (irq >= 0 && r->irq.domain) 106 if (irq >= 0 && r->irqdomain)
107 irq = irq_find_mapping(r->irq.domain, irq); 107 irq = irq_find_mapping(r->irqdomain, irq);
108 108
109 return irq; 109 return irq;
110} 110}
diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index 8db47f671708..02fa8fe2292a 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c
@@ -565,6 +565,7 @@ static const struct dev_pm_ops tegra_gpio_pm_ops = {
565 * than their parents, so it won't report false recursion. 565 * than their parents, so it won't report false recursion.
566 */ 566 */
567static struct lock_class_key gpio_lock_class; 567static struct lock_class_key gpio_lock_class;
568static struct lock_class_key gpio_request_class;
568 569
569static int tegra_gpio_probe(struct platform_device *pdev) 570static int tegra_gpio_probe(struct platform_device *pdev)
570{ 571{
@@ -670,7 +671,8 @@ static int tegra_gpio_probe(struct platform_device *pdev)
670 671
671 bank = &tgi->bank_info[GPIO_BANK(gpio)]; 672 bank = &tgi->bank_info[GPIO_BANK(gpio)];
672 673
673 irq_set_lockdep_class(irq, &gpio_lock_class); 674 irq_set_lockdep_class(irq, &gpio_lock_class,
675 &gpio_request_class);
674 irq_set_chip_data(irq, bank); 676 irq_set_chip_data(irq, bank);
675 irq_set_chip_and_handler(irq, &tgi->ic, handle_simple_irq); 677 irq_set_chip_and_handler(irq, &tgi->ic, handle_simple_irq);
676 } 678 }
diff --git a/drivers/gpio/gpio-xgene-sb.c b/drivers/gpio/gpio-xgene-sb.c
index 2313af82fad3..acd59113e08b 100644
--- a/drivers/gpio/gpio-xgene-sb.c
+++ b/drivers/gpio/gpio-xgene-sb.c
@@ -139,7 +139,7 @@ static int xgene_gpio_sb_to_irq(struct gpio_chip *gc, u32 gpio)
139 139
140static int xgene_gpio_sb_domain_activate(struct irq_domain *d, 140static int xgene_gpio_sb_domain_activate(struct irq_domain *d,
141 struct irq_data *irq_data, 141 struct irq_data *irq_data,
142 bool early) 142 bool reserve)
143{ 143{
144 struct xgene_gpio_sb *priv = d->host_data; 144 struct xgene_gpio_sb *priv = d->host_data;
145 u32 gpio = HWIRQ_TO_GPIO(priv, irq_data->hwirq); 145 u32 gpio = HWIRQ_TO_GPIO(priv, irq_data->hwirq);
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index eb4528c87c0b..d6f3d9ee1350 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -1074,7 +1074,7 @@ void acpi_gpiochip_add(struct gpio_chip *chip)
1074 } 1074 }
1075 1075
1076 if (!chip->names) 1076 if (!chip->names)
1077 devprop_gpiochip_set_names(chip); 1077 devprop_gpiochip_set_names(chip, dev_fwnode(chip->parent));
1078 1078
1079 acpi_gpiochip_request_regions(acpi_gpio); 1079 acpi_gpiochip_request_regions(acpi_gpio);
1080 acpi_gpiochip_scan_gpios(acpi_gpio); 1080 acpi_gpiochip_scan_gpios(acpi_gpio);
diff --git a/drivers/gpio/gpiolib-devprop.c b/drivers/gpio/gpiolib-devprop.c
index 27f383bda7d9..f748aa3e77f7 100644
--- a/drivers/gpio/gpiolib-devprop.c
+++ b/drivers/gpio/gpiolib-devprop.c
@@ -19,30 +19,27 @@
19/** 19/**
20 * devprop_gpiochip_set_names - Set GPIO line names using device properties 20 * devprop_gpiochip_set_names - Set GPIO line names using device properties
21 * @chip: GPIO chip whose lines should be named, if possible 21 * @chip: GPIO chip whose lines should be named, if possible
22 * @fwnode: Property Node containing the gpio-line-names property
22 * 23 *
23 * Looks for device property "gpio-line-names" and if it exists assigns 24 * Looks for device property "gpio-line-names" and if it exists assigns
24 * GPIO line names for the chip. The memory allocated for the assigned 25 * GPIO line names for the chip. The memory allocated for the assigned
25 * names belong to the underlying firmware node and should not be released 26 * names belong to the underlying firmware node and should not be released
26 * by the caller. 27 * by the caller.
27 */ 28 */
28void devprop_gpiochip_set_names(struct gpio_chip *chip) 29void devprop_gpiochip_set_names(struct gpio_chip *chip,
30 const struct fwnode_handle *fwnode)
29{ 31{
30 struct gpio_device *gdev = chip->gpiodev; 32 struct gpio_device *gdev = chip->gpiodev;
31 const char **names; 33 const char **names;
32 int ret, i; 34 int ret, i;
33 35
34 if (!chip->parent) { 36 ret = fwnode_property_read_string_array(fwnode, "gpio-line-names",
35 dev_warn(&gdev->dev, "GPIO chip parent is NULL\n");
36 return;
37 }
38
39 ret = device_property_read_string_array(chip->parent, "gpio-line-names",
40 NULL, 0); 37 NULL, 0);
41 if (ret < 0) 38 if (ret < 0)
42 return; 39 return;
43 40
44 if (ret != gdev->ngpio) { 41 if (ret != gdev->ngpio) {
45 dev_warn(chip->parent, 42 dev_warn(&gdev->dev,
46 "names %d do not match number of GPIOs %d\n", ret, 43 "names %d do not match number of GPIOs %d\n", ret,
47 gdev->ngpio); 44 gdev->ngpio);
48 return; 45 return;
@@ -52,10 +49,10 @@ void devprop_gpiochip_set_names(struct gpio_chip *chip)
52 if (!names) 49 if (!names)
53 return; 50 return;
54 51
55 ret = device_property_read_string_array(chip->parent, "gpio-line-names", 52 ret = fwnode_property_read_string_array(fwnode, "gpio-line-names",
56 names, gdev->ngpio); 53 names, gdev->ngpio);
57 if (ret < 0) { 54 if (ret < 0) {
58 dev_warn(chip->parent, "failed to read GPIO line names\n"); 55 dev_warn(&gdev->dev, "failed to read GPIO line names\n");
59 kfree(names); 56 kfree(names);
60 return; 57 return;
61 } 58 }
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index e0d59e61b52f..72a0695d2ac3 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -493,7 +493,8 @@ int of_gpiochip_add(struct gpio_chip *chip)
493 493
494 /* If the chip defines names itself, these take precedence */ 494 /* If the chip defines names itself, these take precedence */
495 if (!chip->names) 495 if (!chip->names)
496 devprop_gpiochip_set_names(chip); 496 devprop_gpiochip_set_names(chip,
497 of_fwnode_handle(chip->of_node));
497 498
498 of_node_get(chip->of_node); 499 of_node_get(chip->of_node);
499 500
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index aad84a6306c4..44332b793718 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -73,7 +73,8 @@ LIST_HEAD(gpio_devices);
73 73
74static void gpiochip_free_hogs(struct gpio_chip *chip); 74static void gpiochip_free_hogs(struct gpio_chip *chip);
75static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, 75static int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
76 struct lock_class_key *key); 76 struct lock_class_key *lock_key,
77 struct lock_class_key *request_key);
77static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip); 78static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip);
78static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip); 79static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip);
79static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gpiochip); 80static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gpiochip);
@@ -1100,7 +1101,8 @@ static void gpiochip_setup_devs(void)
1100} 1101}
1101 1102
1102int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, 1103int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
1103 struct lock_class_key *key) 1104 struct lock_class_key *lock_key,
1105 struct lock_class_key *request_key)
1104{ 1106{
1105 unsigned long flags; 1107 unsigned long flags;
1106 int status = 0; 1108 int status = 0;
@@ -1246,7 +1248,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
1246 if (status) 1248 if (status)
1247 goto err_remove_from_list; 1249 goto err_remove_from_list;
1248 1250
1249 status = gpiochip_add_irqchip(chip, key); 1251 status = gpiochip_add_irqchip(chip, lock_key, request_key);
1250 if (status) 1252 if (status)
1251 goto err_remove_chip; 1253 goto err_remove_chip;
1252 1254
@@ -1632,7 +1634,7 @@ int gpiochip_irq_map(struct irq_domain *d, unsigned int irq,
1632 * This lock class tells lockdep that GPIO irqs are in a different 1634 * This lock class tells lockdep that GPIO irqs are in a different
1633 * category than their parents, so it won't report false recursion. 1635 * category than their parents, so it won't report false recursion.
1634 */ 1636 */
1635 irq_set_lockdep_class(irq, chip->irq.lock_key); 1637 irq_set_lockdep_class(irq, chip->irq.lock_key, chip->irq.request_key);
1636 irq_set_chip_and_handler(irq, chip->irq.chip, chip->irq.handler); 1638 irq_set_chip_and_handler(irq, chip->irq.chip, chip->irq.handler);
1637 /* Chips that use nested thread handlers have them marked */ 1639 /* Chips that use nested thread handlers have them marked */
1638 if (chip->irq.threaded) 1640 if (chip->irq.threaded)
@@ -1712,10 +1714,12 @@ static int gpiochip_to_irq(struct gpio_chip *chip, unsigned offset)
1712/** 1714/**
1713 * gpiochip_add_irqchip() - adds an IRQ chip to a GPIO chip 1715 * gpiochip_add_irqchip() - adds an IRQ chip to a GPIO chip
1714 * @gpiochip: the GPIO chip to add the IRQ chip to 1716 * @gpiochip: the GPIO chip to add the IRQ chip to
1715 * @lock_key: lockdep class 1717 * @lock_key: lockdep class for IRQ lock
1718 * @request_key: lockdep class for IRQ request
1716 */ 1719 */
1717static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, 1720static int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
1718 struct lock_class_key *lock_key) 1721 struct lock_class_key *lock_key,
1722 struct lock_class_key *request_key)
1719{ 1723{
1720 struct irq_chip *irqchip = gpiochip->irq.chip; 1724 struct irq_chip *irqchip = gpiochip->irq.chip;
1721 const struct irq_domain_ops *ops; 1725 const struct irq_domain_ops *ops;
@@ -1753,6 +1757,7 @@ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
1753 gpiochip->to_irq = gpiochip_to_irq; 1757 gpiochip->to_irq = gpiochip_to_irq;
1754 gpiochip->irq.default_type = type; 1758 gpiochip->irq.default_type = type;
1755 gpiochip->irq.lock_key = lock_key; 1759 gpiochip->irq.lock_key = lock_key;
1760 gpiochip->irq.request_key = request_key;
1756 1761
1757 if (gpiochip->irq.domain_ops) 1762 if (gpiochip->irq.domain_ops)
1758 ops = gpiochip->irq.domain_ops; 1763 ops = gpiochip->irq.domain_ops;
@@ -1850,7 +1855,8 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip)
1850 * @type: the default type for IRQs on this irqchip, pass IRQ_TYPE_NONE 1855 * @type: the default type for IRQs on this irqchip, pass IRQ_TYPE_NONE
1851 * to have the core avoid setting up any default type in the hardware. 1856 * to have the core avoid setting up any default type in the hardware.
1852 * @threaded: whether this irqchip uses a nested thread handler 1857 * @threaded: whether this irqchip uses a nested thread handler
1853 * @lock_key: lockdep class 1858 * @lock_key: lockdep class for IRQ lock
1859 * @request_key: lockdep class for IRQ request
1854 * 1860 *
1855 * This function closely associates a certain irqchip with a certain 1861 * This function closely associates a certain irqchip with a certain
1856 * gpiochip, providing an irq domain to translate the local IRQs to 1862 * gpiochip, providing an irq domain to translate the local IRQs to
@@ -1872,7 +1878,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip,
1872 irq_flow_handler_t handler, 1878 irq_flow_handler_t handler,
1873 unsigned int type, 1879 unsigned int type,
1874 bool threaded, 1880 bool threaded,
1875 struct lock_class_key *lock_key) 1881 struct lock_class_key *lock_key,
1882 struct lock_class_key *request_key)
1876{ 1883{
1877 struct device_node *of_node; 1884 struct device_node *of_node;
1878 1885
@@ -1913,6 +1920,7 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip,
1913 gpiochip->irq.default_type = type; 1920 gpiochip->irq.default_type = type;
1914 gpiochip->to_irq = gpiochip_to_irq; 1921 gpiochip->to_irq = gpiochip_to_irq;
1915 gpiochip->irq.lock_key = lock_key; 1922 gpiochip->irq.lock_key = lock_key;
1923 gpiochip->irq.request_key = request_key;
1916 gpiochip->irq.domain = irq_domain_add_simple(of_node, 1924 gpiochip->irq.domain = irq_domain_add_simple(of_node,
1917 gpiochip->ngpio, first_irq, 1925 gpiochip->ngpio, first_irq,
1918 &gpiochip_domain_ops, gpiochip); 1926 &gpiochip_domain_ops, gpiochip);
@@ -1940,7 +1948,8 @@ EXPORT_SYMBOL_GPL(gpiochip_irqchip_add_key);
1940#else /* CONFIG_GPIOLIB_IRQCHIP */ 1948#else /* CONFIG_GPIOLIB_IRQCHIP */
1941 1949
1942static inline int gpiochip_add_irqchip(struct gpio_chip *gpiochip, 1950static inline int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
1943 struct lock_class_key *key) 1951 struct lock_class_key *lock_key,
1952 struct lock_class_key *request_key)
1944{ 1953{
1945 return 0; 1954 return 0;
1946} 1955}
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index af48322839c3..6c44d1652139 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -228,7 +228,8 @@ static inline int gpio_chip_hwgpio(const struct gpio_desc *desc)
228 return desc - &desc->gdev->descs[0]; 228 return desc - &desc->gdev->descs[0];
229} 229}
230 230
231void devprop_gpiochip_set_names(struct gpio_chip *chip); 231void devprop_gpiochip_set_names(struct gpio_chip *chip,
232 const struct fwnode_handle *fwnode);
232 233
233/* With descriptor prefix */ 234/* With descriptor prefix */
234 235
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index da43813d67a4..5aeb5f8816f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2467,7 +2467,7 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2467 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 2467 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2468 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 2468 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2469 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 2469 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2470 PACKET3_MAP_QUEUES_ALLOC_FORMAT(1) | /* alloc format: all_on_one_pipe */ 2470 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2471 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ 2471 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2472 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 2472 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2473 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 2473 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index f71fe6d2ddda..bb5fa895fb64 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2336,7 +2336,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
2336 const struct dm_connector_state *dm_state) 2336 const struct dm_connector_state *dm_state)
2337{ 2337{
2338 struct drm_display_mode *preferred_mode = NULL; 2338 struct drm_display_mode *preferred_mode = NULL;
2339 const struct drm_connector *drm_connector; 2339 struct drm_connector *drm_connector;
2340 struct dc_stream_state *stream = NULL; 2340 struct dc_stream_state *stream = NULL;
2341 struct drm_display_mode mode = *drm_mode; 2341 struct drm_display_mode mode = *drm_mode;
2342 bool native_mode_found = false; 2342 bool native_mode_found = false;
@@ -2355,11 +2355,13 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
2355 2355
2356 if (!aconnector->dc_sink) { 2356 if (!aconnector->dc_sink) {
2357 /* 2357 /*
2358 * Exclude MST from creating fake_sink 2358 * Create dc_sink when necessary to MST
2359 * TODO: need to enable MST into fake_sink feature 2359 * Don't apply fake_sink to MST
2360 */ 2360 */
2361 if (aconnector->mst_port) 2361 if (aconnector->mst_port) {
2362 goto stream_create_fail; 2362 dm_dp_mst_dc_sink_create(drm_connector);
2363 goto mst_dc_sink_create_done;
2364 }
2363 2365
2364 if (create_fake_sink(aconnector)) 2366 if (create_fake_sink(aconnector))
2365 goto stream_create_fail; 2367 goto stream_create_fail;
@@ -2410,6 +2412,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
2410stream_create_fail: 2412stream_create_fail:
2411dm_state_null: 2413dm_state_null:
2412drm_connector_null: 2414drm_connector_null:
2415mst_dc_sink_create_done:
2413 return stream; 2416 return stream;
2414} 2417}
2415 2418
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 117521c6a6ed..0230250a1164 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -189,6 +189,8 @@ struct amdgpu_dm_connector {
189 struct mutex hpd_lock; 189 struct mutex hpd_lock;
190 190
191 bool fake_enable; 191 bool fake_enable;
192
193 bool mst_connected;
192}; 194};
193 195
194#define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base) 196#define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index f8efb98b1fa7..638c2c2b5cd7 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -185,6 +185,42 @@ static int dm_connector_update_modes(struct drm_connector *connector,
185 return ret; 185 return ret;
186} 186}
187 187
188void dm_dp_mst_dc_sink_create(struct drm_connector *connector)
189{
190 struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
191 struct edid *edid;
192 struct dc_sink *dc_sink;
193 struct dc_sink_init_data init_params = {
194 .link = aconnector->dc_link,
195 .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST };
196
197 edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port);
198
199 if (!edid) {
200 drm_mode_connector_update_edid_property(
201 &aconnector->base,
202 NULL);
203 return;
204 }
205
206 aconnector->edid = edid;
207
208 dc_sink = dc_link_add_remote_sink(
209 aconnector->dc_link,
210 (uint8_t *)aconnector->edid,
211 (aconnector->edid->extensions + 1) * EDID_LENGTH,
212 &init_params);
213
214 dc_sink->priv = aconnector;
215 aconnector->dc_sink = dc_sink;
216
217 amdgpu_dm_add_sink_to_freesync_module(
218 connector, aconnector->edid);
219
220 drm_mode_connector_update_edid_property(
221 &aconnector->base, aconnector->edid);
222}
223
188static int dm_dp_mst_get_modes(struct drm_connector *connector) 224static int dm_dp_mst_get_modes(struct drm_connector *connector)
189{ 225{
190 struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); 226 struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
@@ -311,6 +347,7 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
311 drm_mode_connector_set_path_property(connector, pathprop); 347 drm_mode_connector_set_path_property(connector, pathprop);
312 348
313 drm_connector_list_iter_end(&conn_iter); 349 drm_connector_list_iter_end(&conn_iter);
350 aconnector->mst_connected = true;
314 return &aconnector->base; 351 return &aconnector->base;
315 } 352 }
316 } 353 }
@@ -363,6 +400,8 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
363 */ 400 */
364 amdgpu_dm_connector_funcs_reset(connector); 401 amdgpu_dm_connector_funcs_reset(connector);
365 402
403 aconnector->mst_connected = true;
404
366 DRM_INFO("DM_MST: added connector: %p [id: %d] [master: %p]\n", 405 DRM_INFO("DM_MST: added connector: %p [id: %d] [master: %p]\n",
367 aconnector, connector->base.id, aconnector->mst_port); 406 aconnector, connector->base.id, aconnector->mst_port);
368 407
@@ -394,6 +433,8 @@ static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
394 drm_mode_connector_update_edid_property( 433 drm_mode_connector_update_edid_property(
395 &aconnector->base, 434 &aconnector->base,
396 NULL); 435 NULL);
436
437 aconnector->mst_connected = false;
397} 438}
398 439
399static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr) 440static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr)
@@ -404,10 +445,18 @@ static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr)
404 drm_kms_helper_hotplug_event(dev); 445 drm_kms_helper_hotplug_event(dev);
405} 446}
406 447
448static void dm_dp_mst_link_status_reset(struct drm_connector *connector)
449{
450 mutex_lock(&connector->dev->mode_config.mutex);
451 drm_mode_connector_set_link_status_property(connector, DRM_MODE_LINK_STATUS_BAD);
452 mutex_unlock(&connector->dev->mode_config.mutex);
453}
454
407static void dm_dp_mst_register_connector(struct drm_connector *connector) 455static void dm_dp_mst_register_connector(struct drm_connector *connector)
408{ 456{
409 struct drm_device *dev = connector->dev; 457 struct drm_device *dev = connector->dev;
410 struct amdgpu_device *adev = dev->dev_private; 458 struct amdgpu_device *adev = dev->dev_private;
459 struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
411 460
412 if (adev->mode_info.rfbdev) 461 if (adev->mode_info.rfbdev)
413 drm_fb_helper_add_one_connector(&adev->mode_info.rfbdev->helper, connector); 462 drm_fb_helper_add_one_connector(&adev->mode_info.rfbdev->helper, connector);
@@ -416,6 +465,8 @@ static void dm_dp_mst_register_connector(struct drm_connector *connector)
416 465
417 drm_connector_register(connector); 466 drm_connector_register(connector);
418 467
468 if (aconnector->mst_connected)
469 dm_dp_mst_link_status_reset(connector);
419} 470}
420 471
421static const struct drm_dp_mst_topology_cbs dm_mst_cbs = { 472static const struct drm_dp_mst_topology_cbs dm_mst_cbs = {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
index 2da851b40042..8cf51da26657 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
@@ -31,5 +31,6 @@ struct amdgpu_dm_connector;
31 31
32void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm, 32void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
33 struct amdgpu_dm_connector *aconnector); 33 struct amdgpu_dm_connector *aconnector);
34void dm_dp_mst_dc_sink_create(struct drm_connector *connector);
34 35
35#endif 36#endif
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
index 3dce35e66b09..b142629a1058 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
@@ -900,6 +900,15 @@ bool dcn_validate_bandwidth(
900 v->override_vta_ps[input_idx] = pipe->plane_res.scl_data.taps.v_taps; 900 v->override_vta_ps[input_idx] = pipe->plane_res.scl_data.taps.v_taps;
901 v->override_hta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.h_taps_c; 901 v->override_hta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.h_taps_c;
902 v->override_vta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.v_taps_c; 902 v->override_vta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.v_taps_c;
903 /*
904 * Spreadsheet doesn't handle taps_c is one properly,
905 * need to force Chroma to always be scaled to pass
906 * bandwidth validation.
907 */
908 if (v->override_hta_pschroma[input_idx] == 1)
909 v->override_hta_pschroma[input_idx] = 2;
910 if (v->override_vta_pschroma[input_idx] == 1)
911 v->override_vta_pschroma[input_idx] = 2;
903 v->source_scan[input_idx] = (pipe->plane_state->rotation % 2) ? dcn_bw_vert : dcn_bw_hor; 912 v->source_scan[input_idx] = (pipe->plane_state->rotation % 2) ? dcn_bw_vert : dcn_bw_hor;
904 } 913 }
905 if (v->is_line_buffer_bpp_fixed == dcn_bw_yes) 914 if (v->is_line_buffer_bpp_fixed == dcn_bw_yes)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index e27ed4a45265..42a111b9505d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -1801,7 +1801,7 @@ static void disable_link(struct dc_link *link, enum signal_type signal)
1801 link->link_enc->funcs->disable_output(link->link_enc, signal, link); 1801 link->link_enc->funcs->disable_output(link->link_enc, signal, link);
1802} 1802}
1803 1803
1804bool dp_active_dongle_validate_timing( 1804static bool dp_active_dongle_validate_timing(
1805 const struct dc_crtc_timing *timing, 1805 const struct dc_crtc_timing *timing,
1806 const struct dc_dongle_caps *dongle_caps) 1806 const struct dc_dongle_caps *dongle_caps)
1807{ 1807{
@@ -1833,6 +1833,8 @@ bool dp_active_dongle_validate_timing(
1833 /* Check Color Depth and Pixel Clock */ 1833 /* Check Color Depth and Pixel Clock */
1834 if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) 1834 if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420)
1835 required_pix_clk /= 2; 1835 required_pix_clk /= 2;
1836 else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422)
1837 required_pix_clk = required_pix_clk * 2 / 3;
1836 1838
1837 switch (timing->display_color_depth) { 1839 switch (timing->display_color_depth) {
1838 case COLOR_DEPTH_666: 1840 case COLOR_DEPTH_666:
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 07ff8d2faf3f..d844fadcd56f 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -2866,16 +2866,19 @@ static void dce110_apply_ctx_for_surface(
2866 int num_planes, 2866 int num_planes,
2867 struct dc_state *context) 2867 struct dc_state *context)
2868{ 2868{
2869 int i, be_idx; 2869 int i;
2870 2870
2871 if (num_planes == 0) 2871 if (num_planes == 0)
2872 return; 2872 return;
2873 2873
2874 be_idx = -1;
2875 for (i = 0; i < dc->res_pool->pipe_count; i++) { 2874 for (i = 0; i < dc->res_pool->pipe_count; i++) {
2876 if (stream == context->res_ctx.pipe_ctx[i].stream) { 2875 struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
2877 be_idx = context->res_ctx.pipe_ctx[i].stream_res.tg->inst; 2876 struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
2878 break; 2877
2878 if (stream == pipe_ctx->stream) {
2879 if (!pipe_ctx->top_pipe &&
2880 (pipe_ctx->plane_state || old_pipe_ctx->plane_state))
2881 dc->hwss.pipe_control_lock(dc, pipe_ctx, true);
2879 } 2882 }
2880 } 2883 }
2881 2884
@@ -2895,9 +2898,22 @@ static void dce110_apply_ctx_for_surface(
2895 context->stream_count); 2898 context->stream_count);
2896 2899
2897 dce110_program_front_end_for_pipe(dc, pipe_ctx); 2900 dce110_program_front_end_for_pipe(dc, pipe_ctx);
2901
2902 dc->hwss.update_plane_addr(dc, pipe_ctx);
2903
2898 program_surface_visibility(dc, pipe_ctx); 2904 program_surface_visibility(dc, pipe_ctx);
2899 2905
2900 } 2906 }
2907
2908 for (i = 0; i < dc->res_pool->pipe_count; i++) {
2909 struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
2910 struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
2911
2912 if ((stream == pipe_ctx->stream) &&
2913 (!pipe_ctx->top_pipe) &&
2914 (pipe_ctx->plane_state || old_pipe_ctx->plane_state))
2915 dc->hwss.pipe_control_lock(dc, pipe_ctx, false);
2916 }
2901} 2917}
2902 2918
2903static void dce110_power_down_fe(struct dc *dc, int fe_idx) 2919static void dce110_power_down_fe(struct dc *dc, int fe_idx)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
index 74e7c82bdc76..a9d55d0dd69e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
@@ -159,11 +159,10 @@ bool dpp_get_optimal_number_of_taps(
159 scl_data->taps.h_taps = 1; 159 scl_data->taps.h_taps = 1;
160 if (IDENTITY_RATIO(scl_data->ratios.vert)) 160 if (IDENTITY_RATIO(scl_data->ratios.vert))
161 scl_data->taps.v_taps = 1; 161 scl_data->taps.v_taps = 1;
162 /* 162 if (IDENTITY_RATIO(scl_data->ratios.horz_c))
163 * Spreadsheet doesn't handle taps_c is one properly, 163 scl_data->taps.h_taps_c = 1;
164 * need to force Chroma to always be scaled to pass 164 if (IDENTITY_RATIO(scl_data->ratios.vert_c))
165 * bandwidth validation. 165 scl_data->taps.v_taps_c = 1;
166 */
167 } 166 }
168 167
169 return true; 168 return true;
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 482014137953..9ae236036e32 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -152,14 +152,23 @@ static void drm_connector_free(struct kref *kref)
152 connector->funcs->destroy(connector); 152 connector->funcs->destroy(connector);
153} 153}
154 154
155static void drm_connector_free_work_fn(struct work_struct *work) 155void drm_connector_free_work_fn(struct work_struct *work)
156{ 156{
157 struct drm_connector *connector = 157 struct drm_connector *connector, *n;
158 container_of(work, struct drm_connector, free_work); 158 struct drm_device *dev =
159 struct drm_device *dev = connector->dev; 159 container_of(work, struct drm_device, mode_config.connector_free_work);
160 struct drm_mode_config *config = &dev->mode_config;
161 unsigned long flags;
162 struct llist_node *freed;
160 163
161 drm_mode_object_unregister(dev, &connector->base); 164 spin_lock_irqsave(&config->connector_list_lock, flags);
162 connector->funcs->destroy(connector); 165 freed = llist_del_all(&config->connector_free_list);
166 spin_unlock_irqrestore(&config->connector_list_lock, flags);
167
168 llist_for_each_entry_safe(connector, n, freed, free_node) {
169 drm_mode_object_unregister(dev, &connector->base);
170 connector->funcs->destroy(connector);
171 }
163} 172}
164 173
165/** 174/**
@@ -191,8 +200,6 @@ int drm_connector_init(struct drm_device *dev,
191 if (ret) 200 if (ret)
192 return ret; 201 return ret;
193 202
194 INIT_WORK(&connector->free_work, drm_connector_free_work_fn);
195
196 connector->base.properties = &connector->properties; 203 connector->base.properties = &connector->properties;
197 connector->dev = dev; 204 connector->dev = dev;
198 connector->funcs = funcs; 205 connector->funcs = funcs;
@@ -547,10 +554,17 @@ EXPORT_SYMBOL(drm_connector_list_iter_begin);
547 * actually release the connector when dropping our final reference. 554 * actually release the connector when dropping our final reference.
548 */ 555 */
549static void 556static void
550drm_connector_put_safe(struct drm_connector *conn) 557__drm_connector_put_safe(struct drm_connector *conn)
551{ 558{
552 if (refcount_dec_and_test(&conn->base.refcount.refcount)) 559 struct drm_mode_config *config = &conn->dev->mode_config;
553 schedule_work(&conn->free_work); 560
561 lockdep_assert_held(&config->connector_list_lock);
562
563 if (!refcount_dec_and_test(&conn->base.refcount.refcount))
564 return;
565
566 llist_add(&conn->free_node, &config->connector_free_list);
567 schedule_work(&config->connector_free_work);
554} 568}
555 569
556/** 570/**
@@ -582,10 +596,10 @@ drm_connector_list_iter_next(struct drm_connector_list_iter *iter)
582 596
583 /* loop until it's not a zombie connector */ 597 /* loop until it's not a zombie connector */
584 } while (!kref_get_unless_zero(&iter->conn->base.refcount)); 598 } while (!kref_get_unless_zero(&iter->conn->base.refcount));
585 spin_unlock_irqrestore(&config->connector_list_lock, flags);
586 599
587 if (old_conn) 600 if (old_conn)
588 drm_connector_put_safe(old_conn); 601 __drm_connector_put_safe(old_conn);
602 spin_unlock_irqrestore(&config->connector_list_lock, flags);
589 603
590 return iter->conn; 604 return iter->conn;
591} 605}
@@ -602,9 +616,15 @@ EXPORT_SYMBOL(drm_connector_list_iter_next);
602 */ 616 */
603void drm_connector_list_iter_end(struct drm_connector_list_iter *iter) 617void drm_connector_list_iter_end(struct drm_connector_list_iter *iter)
604{ 618{
619 struct drm_mode_config *config = &iter->dev->mode_config;
620 unsigned long flags;
621
605 iter->dev = NULL; 622 iter->dev = NULL;
606 if (iter->conn) 623 if (iter->conn) {
607 drm_connector_put_safe(iter->conn); 624 spin_lock_irqsave(&config->connector_list_lock, flags);
625 __drm_connector_put_safe(iter->conn);
626 spin_unlock_irqrestore(&config->connector_list_lock, flags);
627 }
608 lock_release(&connector_list_iter_dep_map, 0, _RET_IP_); 628 lock_release(&connector_list_iter_dep_map, 0, _RET_IP_);
609} 629}
610EXPORT_SYMBOL(drm_connector_list_iter_end); 630EXPORT_SYMBOL(drm_connector_list_iter_end);
@@ -1231,6 +1251,19 @@ int drm_mode_connector_update_edid_property(struct drm_connector *connector,
1231 if (edid) 1251 if (edid)
1232 size = EDID_LENGTH * (1 + edid->extensions); 1252 size = EDID_LENGTH * (1 + edid->extensions);
1233 1253
1254 /* Set the display info, using edid if available, otherwise
1255 * reseting the values to defaults. This duplicates the work
1256 * done in drm_add_edid_modes, but that function is not
1257 * consistently called before this one in all drivers and the
1258 * computation is cheap enough that it seems better to
1259 * duplicate it rather than attempt to ensure some arbitrary
1260 * ordering of calls.
1261 */
1262 if (edid)
1263 drm_add_display_info(connector, edid);
1264 else
1265 drm_reset_display_info(connector);
1266
1234 drm_object_property_set_value(&connector->base, 1267 drm_object_property_set_value(&connector->base,
1235 dev->mode_config.non_desktop_property, 1268 dev->mode_config.non_desktop_property,
1236 connector->display_info.non_desktop); 1269 connector->display_info.non_desktop);
diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h
index 9ebb8841778c..af00f42ba269 100644
--- a/drivers/gpu/drm/drm_crtc_internal.h
+++ b/drivers/gpu/drm/drm_crtc_internal.h
@@ -142,6 +142,7 @@ int drm_mode_connector_set_obj_prop(struct drm_mode_object *obj,
142 uint64_t value); 142 uint64_t value);
143int drm_connector_create_standard_properties(struct drm_device *dev); 143int drm_connector_create_standard_properties(struct drm_device *dev);
144const char *drm_get_connector_force_name(enum drm_connector_force force); 144const char *drm_get_connector_force_name(enum drm_connector_force force);
145void drm_connector_free_work_fn(struct work_struct *work);
145 146
146/* IOCTL */ 147/* IOCTL */
147int drm_mode_connector_property_set_ioctl(struct drm_device *dev, 148int drm_mode_connector_property_set_ioctl(struct drm_device *dev,
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 5dfe14763871..cb487148359a 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -1731,7 +1731,7 @@ EXPORT_SYMBOL(drm_edid_duplicate);
1731 * 1731 *
1732 * Returns true if @vendor is in @edid, false otherwise 1732 * Returns true if @vendor is in @edid, false otherwise
1733 */ 1733 */
1734static bool edid_vendor(struct edid *edid, const char *vendor) 1734static bool edid_vendor(const struct edid *edid, const char *vendor)
1735{ 1735{
1736 char edid_vendor[3]; 1736 char edid_vendor[3];
1737 1737
@@ -1749,7 +1749,7 @@ static bool edid_vendor(struct edid *edid, const char *vendor)
1749 * 1749 *
1750 * This tells subsequent routines what fixes they need to apply. 1750 * This tells subsequent routines what fixes they need to apply.
1751 */ 1751 */
1752static u32 edid_get_quirks(struct edid *edid) 1752static u32 edid_get_quirks(const struct edid *edid)
1753{ 1753{
1754 const struct edid_quirk *quirk; 1754 const struct edid_quirk *quirk;
1755 int i; 1755 int i;
@@ -2813,7 +2813,7 @@ add_detailed_modes(struct drm_connector *connector, struct edid *edid,
2813/* 2813/*
2814 * Search EDID for CEA extension block. 2814 * Search EDID for CEA extension block.
2815 */ 2815 */
2816static u8 *drm_find_edid_extension(struct edid *edid, int ext_id) 2816static u8 *drm_find_edid_extension(const struct edid *edid, int ext_id)
2817{ 2817{
2818 u8 *edid_ext = NULL; 2818 u8 *edid_ext = NULL;
2819 int i; 2819 int i;
@@ -2835,12 +2835,12 @@ static u8 *drm_find_edid_extension(struct edid *edid, int ext_id)
2835 return edid_ext; 2835 return edid_ext;
2836} 2836}
2837 2837
2838static u8 *drm_find_cea_extension(struct edid *edid) 2838static u8 *drm_find_cea_extension(const struct edid *edid)
2839{ 2839{
2840 return drm_find_edid_extension(edid, CEA_EXT); 2840 return drm_find_edid_extension(edid, CEA_EXT);
2841} 2841}
2842 2842
2843static u8 *drm_find_displayid_extension(struct edid *edid) 2843static u8 *drm_find_displayid_extension(const struct edid *edid)
2844{ 2844{
2845 return drm_find_edid_extension(edid, DISPLAYID_EXT); 2845 return drm_find_edid_extension(edid, DISPLAYID_EXT);
2846} 2846}
@@ -4363,7 +4363,7 @@ drm_parse_hdmi_vsdb_video(struct drm_connector *connector, const u8 *db)
4363} 4363}
4364 4364
4365static void drm_parse_cea_ext(struct drm_connector *connector, 4365static void drm_parse_cea_ext(struct drm_connector *connector,
4366 struct edid *edid) 4366 const struct edid *edid)
4367{ 4367{
4368 struct drm_display_info *info = &connector->display_info; 4368 struct drm_display_info *info = &connector->display_info;
4369 const u8 *edid_ext; 4369 const u8 *edid_ext;
@@ -4397,11 +4397,33 @@ static void drm_parse_cea_ext(struct drm_connector *connector,
4397 } 4397 }
4398} 4398}
4399 4399
4400static void drm_add_display_info(struct drm_connector *connector, 4400/* A connector has no EDID information, so we've got no EDID to compute quirks from. Reset
4401 struct edid *edid, u32 quirks) 4401 * all of the values which would have been set from EDID
4402 */
4403void
4404drm_reset_display_info(struct drm_connector *connector)
4402{ 4405{
4403 struct drm_display_info *info = &connector->display_info; 4406 struct drm_display_info *info = &connector->display_info;
4404 4407
4408 info->width_mm = 0;
4409 info->height_mm = 0;
4410
4411 info->bpc = 0;
4412 info->color_formats = 0;
4413 info->cea_rev = 0;
4414 info->max_tmds_clock = 0;
4415 info->dvi_dual = false;
4416
4417 info->non_desktop = 0;
4418}
4419EXPORT_SYMBOL_GPL(drm_reset_display_info);
4420
4421u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edid)
4422{
4423 struct drm_display_info *info = &connector->display_info;
4424
4425 u32 quirks = edid_get_quirks(edid);
4426
4405 info->width_mm = edid->width_cm * 10; 4427 info->width_mm = edid->width_cm * 10;
4406 info->height_mm = edid->height_cm * 10; 4428 info->height_mm = edid->height_cm * 10;
4407 4429
@@ -4414,11 +4436,13 @@ static void drm_add_display_info(struct drm_connector *connector,
4414 4436
4415 info->non_desktop = !!(quirks & EDID_QUIRK_NON_DESKTOP); 4437 info->non_desktop = !!(quirks & EDID_QUIRK_NON_DESKTOP);
4416 4438
4439 DRM_DEBUG_KMS("non_desktop set to %d\n", info->non_desktop);
4440
4417 if (edid->revision < 3) 4441 if (edid->revision < 3)
4418 return; 4442 return quirks;
4419 4443
4420 if (!(edid->input & DRM_EDID_INPUT_DIGITAL)) 4444 if (!(edid->input & DRM_EDID_INPUT_DIGITAL))
4421 return; 4445 return quirks;
4422 4446
4423 drm_parse_cea_ext(connector, edid); 4447 drm_parse_cea_ext(connector, edid);
4424 4448
@@ -4438,7 +4462,7 @@ static void drm_add_display_info(struct drm_connector *connector,
4438 4462
4439 /* Only defined for 1.4 with digital displays */ 4463 /* Only defined for 1.4 with digital displays */
4440 if (edid->revision < 4) 4464 if (edid->revision < 4)
4441 return; 4465 return quirks;
4442 4466
4443 switch (edid->input & DRM_EDID_DIGITAL_DEPTH_MASK) { 4467 switch (edid->input & DRM_EDID_DIGITAL_DEPTH_MASK) {
4444 case DRM_EDID_DIGITAL_DEPTH_6: 4468 case DRM_EDID_DIGITAL_DEPTH_6:
@@ -4473,7 +4497,9 @@ static void drm_add_display_info(struct drm_connector *connector,
4473 info->color_formats |= DRM_COLOR_FORMAT_YCRCB444; 4497 info->color_formats |= DRM_COLOR_FORMAT_YCRCB444;
4474 if (edid->features & DRM_EDID_FEATURE_RGB_YCRCB422) 4498 if (edid->features & DRM_EDID_FEATURE_RGB_YCRCB422)
4475 info->color_formats |= DRM_COLOR_FORMAT_YCRCB422; 4499 info->color_formats |= DRM_COLOR_FORMAT_YCRCB422;
4500 return quirks;
4476} 4501}
4502EXPORT_SYMBOL_GPL(drm_add_display_info);
4477 4503
4478static int validate_displayid(u8 *displayid, int length, int idx) 4504static int validate_displayid(u8 *displayid, int length, int idx)
4479{ 4505{
@@ -4627,14 +4653,12 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid)
4627 return 0; 4653 return 0;
4628 } 4654 }
4629 4655
4630 quirks = edid_get_quirks(edid);
4631
4632 /* 4656 /*
4633 * CEA-861-F adds ycbcr capability map block, for HDMI 2.0 sinks. 4657 * CEA-861-F adds ycbcr capability map block, for HDMI 2.0 sinks.
4634 * To avoid multiple parsing of same block, lets parse that map 4658 * To avoid multiple parsing of same block, lets parse that map
4635 * from sink info, before parsing CEA modes. 4659 * from sink info, before parsing CEA modes.
4636 */ 4660 */
4637 drm_add_display_info(connector, edid, quirks); 4661 quirks = drm_add_display_info(connector, edid);
4638 4662
4639 /* 4663 /*
4640 * EDID spec says modes should be preferred in this order: 4664 * EDID spec says modes should be preferred in this order:
diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c
index d1eb56a1eff4..1402c0e71b03 100644
--- a/drivers/gpu/drm/drm_lease.c
+++ b/drivers/gpu/drm/drm_lease.c
@@ -220,17 +220,6 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr
220 220
221 mutex_lock(&dev->mode_config.idr_mutex); 221 mutex_lock(&dev->mode_config.idr_mutex);
222 222
223 /* Insert the new lessee into the tree */
224 id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL);
225 if (id < 0) {
226 error = id;
227 goto out_lessee;
228 }
229
230 lessee->lessee_id = id;
231 lessee->lessor = drm_master_get(lessor);
232 list_add_tail(&lessee->lessee_list, &lessor->lessees);
233
234 idr_for_each_entry(leases, entry, object) { 223 idr_for_each_entry(leases, entry, object) {
235 error = 0; 224 error = 0;
236 if (!idr_find(&dev->mode_config.crtc_idr, object)) 225 if (!idr_find(&dev->mode_config.crtc_idr, object))
@@ -246,6 +235,17 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr
246 } 235 }
247 } 236 }
248 237
238 /* Insert the new lessee into the tree */
239 id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL);
240 if (id < 0) {
241 error = id;
242 goto out_lessee;
243 }
244
245 lessee->lessee_id = id;
246 lessee->lessor = drm_master_get(lessor);
247 list_add_tail(&lessee->lessee_list, &lessor->lessees);
248
249 /* Move the leases over */ 249 /* Move the leases over */
250 lessee->leases = *leases; 250 lessee->leases = *leases;
251 DRM_DEBUG_LEASE("new lessee %d %p, lessor %d %p\n", lessee->lessee_id, lessee, lessor->lessee_id, lessor); 251 DRM_DEBUG_LEASE("new lessee %d %p, lessor %d %p\n", lessee->lessee_id, lessee, lessor->lessee_id, lessor);
@@ -254,10 +254,10 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr
254 return lessee; 254 return lessee;
255 255
256out_lessee: 256out_lessee:
257 drm_master_put(&lessee);
258
259 mutex_unlock(&dev->mode_config.idr_mutex); 257 mutex_unlock(&dev->mode_config.idr_mutex);
260 258
259 drm_master_put(&lessee);
260
261 return ERR_PTR(error); 261 return ERR_PTR(error);
262} 262}
263 263
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 61a1c8ea74bc..c3c79ee6119e 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -575,21 +575,23 @@ EXPORT_SYMBOL(drm_mm_remove_node);
575 */ 575 */
576void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new) 576void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new)
577{ 577{
578 struct drm_mm *mm = old->mm;
579
578 DRM_MM_BUG_ON(!old->allocated); 580 DRM_MM_BUG_ON(!old->allocated);
579 581
580 *new = *old; 582 *new = *old;
581 583
582 list_replace(&old->node_list, &new->node_list); 584 list_replace(&old->node_list, &new->node_list);
583 rb_replace_node(&old->rb, &new->rb, &old->mm->interval_tree.rb_root); 585 rb_replace_node_cached(&old->rb, &new->rb, &mm->interval_tree);
584 586
585 if (drm_mm_hole_follows(old)) { 587 if (drm_mm_hole_follows(old)) {
586 list_replace(&old->hole_stack, &new->hole_stack); 588 list_replace(&old->hole_stack, &new->hole_stack);
587 rb_replace_node(&old->rb_hole_size, 589 rb_replace_node(&old->rb_hole_size,
588 &new->rb_hole_size, 590 &new->rb_hole_size,
589 &old->mm->holes_size); 591 &mm->holes_size);
590 rb_replace_node(&old->rb_hole_addr, 592 rb_replace_node(&old->rb_hole_addr,
591 &new->rb_hole_addr, 593 &new->rb_hole_addr,
592 &old->mm->holes_addr); 594 &mm->holes_addr);
593 } 595 }
594 596
595 old->allocated = false; 597 old->allocated = false;
diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c
index cc78b3d9e5e4..256de7313612 100644
--- a/drivers/gpu/drm/drm_mode_config.c
+++ b/drivers/gpu/drm/drm_mode_config.c
@@ -382,6 +382,9 @@ void drm_mode_config_init(struct drm_device *dev)
382 ida_init(&dev->mode_config.connector_ida); 382 ida_init(&dev->mode_config.connector_ida);
383 spin_lock_init(&dev->mode_config.connector_list_lock); 383 spin_lock_init(&dev->mode_config.connector_list_lock);
384 384
385 init_llist_head(&dev->mode_config.connector_free_list);
386 INIT_WORK(&dev->mode_config.connector_free_work, drm_connector_free_work_fn);
387
385 drm_mode_create_standard_properties(dev); 388 drm_mode_create_standard_properties(dev);
386 389
387 /* Just to be sure */ 390 /* Just to be sure */
@@ -432,7 +435,7 @@ void drm_mode_config_cleanup(struct drm_device *dev)
432 } 435 }
433 drm_connector_list_iter_end(&conn_iter); 436 drm_connector_list_iter_end(&conn_iter);
434 /* connector_iter drops references in a work item. */ 437 /* connector_iter drops references in a work item. */
435 flush_scheduled_work(); 438 flush_work(&dev->mode_config.connector_free_work);
436 if (WARN_ON(!list_empty(&dev->mode_config.connector_list))) { 439 if (WARN_ON(!list_empty(&dev->mode_config.connector_list))) {
437 drm_connector_list_iter_begin(dev, &conn_iter); 440 drm_connector_list_iter_begin(dev, &conn_iter);
438 drm_for_each_connector_iter(connector, &conn_iter) 441 drm_for_each_connector_iter(connector, &conn_iter)
diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index 37a93cdffb4a..2c90519576a3 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -558,11 +558,10 @@ int drm_plane_check_pixel_format(const struct drm_plane *plane, u32 format)
558} 558}
559 559
560/* 560/*
561 * setplane_internal - setplane handler for internal callers 561 * __setplane_internal - setplane handler for internal callers
562 * 562 *
563 * Note that we assume an extra reference has already been taken on fb. If the 563 * This function will take a reference on the new fb for the plane
564 * update fails, this reference will be dropped before return; if it succeeds, 564 * on success.
565 * the previous framebuffer (if any) will be unreferenced instead.
566 * 565 *
567 * src_{x,y,w,h} are provided in 16.16 fixed point format 566 * src_{x,y,w,h} are provided in 16.16 fixed point format
568 */ 567 */
@@ -630,14 +629,12 @@ static int __setplane_internal(struct drm_plane *plane,
630 if (!ret) { 629 if (!ret) {
631 plane->crtc = crtc; 630 plane->crtc = crtc;
632 plane->fb = fb; 631 plane->fb = fb;
633 fb = NULL; 632 drm_framebuffer_get(plane->fb);
634 } else { 633 } else {
635 plane->old_fb = NULL; 634 plane->old_fb = NULL;
636 } 635 }
637 636
638out: 637out:
639 if (fb)
640 drm_framebuffer_put(fb);
641 if (plane->old_fb) 638 if (plane->old_fb)
642 drm_framebuffer_put(plane->old_fb); 639 drm_framebuffer_put(plane->old_fb);
643 plane->old_fb = NULL; 640 plane->old_fb = NULL;
@@ -685,6 +682,7 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
685 struct drm_plane *plane; 682 struct drm_plane *plane;
686 struct drm_crtc *crtc = NULL; 683 struct drm_crtc *crtc = NULL;
687 struct drm_framebuffer *fb = NULL; 684 struct drm_framebuffer *fb = NULL;
685 int ret;
688 686
689 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 687 if (!drm_core_check_feature(dev, DRIVER_MODESET))
690 return -EINVAL; 688 return -EINVAL;
@@ -717,15 +715,16 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
717 } 715 }
718 } 716 }
719 717
720 /* 718 ret = setplane_internal(plane, crtc, fb,
721 * setplane_internal will take care of deref'ing either the old or new 719 plane_req->crtc_x, plane_req->crtc_y,
722 * framebuffer depending on success. 720 plane_req->crtc_w, plane_req->crtc_h,
723 */ 721 plane_req->src_x, plane_req->src_y,
724 return setplane_internal(plane, crtc, fb, 722 plane_req->src_w, plane_req->src_h);
725 plane_req->crtc_x, plane_req->crtc_y, 723
726 plane_req->crtc_w, plane_req->crtc_h, 724 if (fb)
727 plane_req->src_x, plane_req->src_y, 725 drm_framebuffer_put(fb);
728 plane_req->src_w, plane_req->src_h); 726
727 return ret;
729} 728}
730 729
731static int drm_mode_cursor_universal(struct drm_crtc *crtc, 730static int drm_mode_cursor_universal(struct drm_crtc *crtc,
@@ -788,13 +787,12 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc,
788 src_h = fb->height << 16; 787 src_h = fb->height << 16;
789 } 788 }
790 789
791 /*
792 * setplane_internal will take care of deref'ing either the old or new
793 * framebuffer depending on success.
794 */
795 ret = __setplane_internal(crtc->cursor, crtc, fb, 790 ret = __setplane_internal(crtc->cursor, crtc, fb,
796 crtc_x, crtc_y, crtc_w, crtc_h, 791 crtc_x, crtc_y, crtc_w, crtc_h,
797 0, 0, src_w, src_h, ctx); 792 0, 0, src_w, src_h, ctx);
793
794 if (fb)
795 drm_framebuffer_put(fb);
798 796
799 /* Update successful; save new cursor position, if necessary */ 797 /* Update successful; save new cursor position, if necessary */
800 if (ret == 0 && req->flags & DRM_MODE_CURSOR_MOVE) { 798 if (ret == 0 && req->flags & DRM_MODE_CURSOR_MOVE) {
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index f776fc1cc543..cb4d09c70fd4 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -369,40 +369,26 @@ static const struct file_operations drm_syncobj_file_fops = {
369 .release = drm_syncobj_file_release, 369 .release = drm_syncobj_file_release,
370}; 370};
371 371
372static int drm_syncobj_alloc_file(struct drm_syncobj *syncobj)
373{
374 struct file *file = anon_inode_getfile("syncobj_file",
375 &drm_syncobj_file_fops,
376 syncobj, 0);
377 if (IS_ERR(file))
378 return PTR_ERR(file);
379
380 drm_syncobj_get(syncobj);
381 if (cmpxchg(&syncobj->file, NULL, file)) {
382 /* lost the race */
383 fput(file);
384 }
385
386 return 0;
387}
388
389int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd) 372int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd)
390{ 373{
391 int ret; 374 struct file *file;
392 int fd; 375 int fd;
393 376
394 fd = get_unused_fd_flags(O_CLOEXEC); 377 fd = get_unused_fd_flags(O_CLOEXEC);
395 if (fd < 0) 378 if (fd < 0)
396 return fd; 379 return fd;
397 380
398 if (!syncobj->file) { 381 file = anon_inode_getfile("syncobj_file",
399 ret = drm_syncobj_alloc_file(syncobj); 382 &drm_syncobj_file_fops,
400 if (ret) { 383 syncobj, 0);
401 put_unused_fd(fd); 384 if (IS_ERR(file)) {
402 return ret; 385 put_unused_fd(fd);
403 } 386 return PTR_ERR(file);
404 } 387 }
405 fd_install(fd, syncobj->file); 388
389 drm_syncobj_get(syncobj);
390 fd_install(fd, file);
391
406 *p_fd = fd; 392 *p_fd = fd;
407 return 0; 393 return 0;
408} 394}
@@ -422,31 +408,24 @@ static int drm_syncobj_handle_to_fd(struct drm_file *file_private,
422 return ret; 408 return ret;
423} 409}
424 410
425static struct drm_syncobj *drm_syncobj_fdget(int fd)
426{
427 struct file *file = fget(fd);
428
429 if (!file)
430 return NULL;
431 if (file->f_op != &drm_syncobj_file_fops)
432 goto err;
433
434 return file->private_data;
435err:
436 fput(file);
437 return NULL;
438};
439
440static int drm_syncobj_fd_to_handle(struct drm_file *file_private, 411static int drm_syncobj_fd_to_handle(struct drm_file *file_private,
441 int fd, u32 *handle) 412 int fd, u32 *handle)
442{ 413{
443 struct drm_syncobj *syncobj = drm_syncobj_fdget(fd); 414 struct drm_syncobj *syncobj;
415 struct file *file;
444 int ret; 416 int ret;
445 417
446 if (!syncobj) 418 file = fget(fd);
419 if (!file)
447 return -EINVAL; 420 return -EINVAL;
448 421
422 if (file->f_op != &drm_syncobj_file_fops) {
423 fput(file);
424 return -EINVAL;
425 }
426
449 /* take a reference to put in the idr */ 427 /* take a reference to put in the idr */
428 syncobj = file->private_data;
450 drm_syncobj_get(syncobj); 429 drm_syncobj_get(syncobj);
451 430
452 idr_preload(GFP_KERNEL); 431 idr_preload(GFP_KERNEL);
@@ -455,12 +434,14 @@ static int drm_syncobj_fd_to_handle(struct drm_file *file_private,
455 spin_unlock(&file_private->syncobj_table_lock); 434 spin_unlock(&file_private->syncobj_table_lock);
456 idr_preload_end(); 435 idr_preload_end();
457 436
458 if (ret < 0) { 437 if (ret > 0) {
459 fput(syncobj->file); 438 *handle = ret;
460 return ret; 439 ret = 0;
461 } 440 } else
462 *handle = ret; 441 drm_syncobj_put(syncobj);
463 return 0; 442
443 fput(file);
444 return ret;
464} 445}
465 446
466static int drm_syncobj_import_sync_file_fence(struct drm_file *file_private, 447static int drm_syncobj_import_sync_file_fence(struct drm_file *file_private,
diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index 355120865efd..309f3fa6794a 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -266,6 +266,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
266 /* Clear host CRT status, so guest couldn't detect this host CRT. */ 266 /* Clear host CRT status, so guest couldn't detect this host CRT. */
267 if (IS_BROADWELL(dev_priv)) 267 if (IS_BROADWELL(dev_priv))
268 vgpu_vreg(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK; 268 vgpu_vreg(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK;
269
270 vgpu_vreg(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE;
269} 271}
270 272
271static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num) 273static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num)
@@ -282,7 +284,6 @@ static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num)
282static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, 284static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
283 int type, unsigned int resolution) 285 int type, unsigned int resolution)
284{ 286{
285 struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
286 struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num); 287 struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
287 288
288 if (WARN_ON(resolution >= GVT_EDID_NUM)) 289 if (WARN_ON(resolution >= GVT_EDID_NUM))
@@ -308,7 +309,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
308 port->type = type; 309 port->type = type;
309 310
310 emulate_monitor_status_change(vgpu); 311 emulate_monitor_status_change(vgpu);
311 vgpu_vreg(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE; 312
312 return 0; 313 return 0;
313} 314}
314 315
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ad4050f7ab3b..18de6569d04a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -330,17 +330,10 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
330 * must wait for all rendering to complete to the object (as unbinding 330 * must wait for all rendering to complete to the object (as unbinding
331 * must anyway), and retire the requests. 331 * must anyway), and retire the requests.
332 */ 332 */
333 ret = i915_gem_object_wait(obj, 333 ret = i915_gem_object_set_to_cpu_domain(obj, false);
334 I915_WAIT_INTERRUPTIBLE |
335 I915_WAIT_LOCKED |
336 I915_WAIT_ALL,
337 MAX_SCHEDULE_TIMEOUT,
338 NULL);
339 if (ret) 334 if (ret)
340 return ret; 335 return ret;
341 336
342 i915_gem_retire_requests(to_i915(obj->base.dev));
343
344 while ((vma = list_first_entry_or_null(&obj->vma_list, 337 while ((vma = list_first_entry_or_null(&obj->vma_list,
345 struct i915_vma, 338 struct i915_vma,
346 obj_link))) { 339 obj_link))) {
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index e8ca67a129d2..ac236b88c99c 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -367,6 +367,7 @@ struct i915_sw_dma_fence_cb {
367 struct dma_fence *dma; 367 struct dma_fence *dma;
368 struct timer_list timer; 368 struct timer_list timer;
369 struct irq_work work; 369 struct irq_work work;
370 struct rcu_head rcu;
370}; 371};
371 372
372static void timer_i915_sw_fence_wake(struct timer_list *t) 373static void timer_i915_sw_fence_wake(struct timer_list *t)
@@ -406,7 +407,7 @@ static void irq_i915_sw_fence_work(struct irq_work *wrk)
406 del_timer_sync(&cb->timer); 407 del_timer_sync(&cb->timer);
407 dma_fence_put(cb->dma); 408 dma_fence_put(cb->dma);
408 409
409 kfree(cb); 410 kfree_rcu(cb, rcu);
410} 411}
411 412
412int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, 413int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 5f8b9f1f40f1..bcbc7abe6693 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -186,7 +186,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
186 struct intel_wait *wait, *n, *first; 186 struct intel_wait *wait, *n, *first;
187 187
188 if (!b->irq_armed) 188 if (!b->irq_armed)
189 return; 189 goto wakeup_signaler;
190 190
191 /* We only disarm the irq when we are idle (all requests completed), 191 /* We only disarm the irq when we are idle (all requests completed),
192 * so if the bottom-half remains asleep, it missed the request 192 * so if the bottom-half remains asleep, it missed the request
@@ -208,6 +208,14 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
208 b->waiters = RB_ROOT; 208 b->waiters = RB_ROOT;
209 209
210 spin_unlock_irq(&b->rb_lock); 210 spin_unlock_irq(&b->rb_lock);
211
212 /*
213 * The signaling thread may be asleep holding a reference to a request,
214 * that had its signaling cancelled prior to being preempted. We need
215 * to kick the signaler, just in case, to release any such reference.
216 */
217wakeup_signaler:
218 wake_up_process(b->signaler);
211} 219}
212 220
213static bool use_fake_irq(const struct intel_breadcrumbs *b) 221static bool use_fake_irq(const struct intel_breadcrumbs *b)
@@ -651,23 +659,15 @@ static int intel_breadcrumbs_signaler(void *arg)
651 } 659 }
652 660
653 if (unlikely(do_schedule)) { 661 if (unlikely(do_schedule)) {
654 DEFINE_WAIT(exec);
655
656 if (kthread_should_park()) 662 if (kthread_should_park())
657 kthread_parkme(); 663 kthread_parkme();
658 664
659 if (kthread_should_stop()) { 665 if (unlikely(kthread_should_stop())) {
660 GEM_BUG_ON(request); 666 i915_gem_request_put(request);
661 break; 667 break;
662 } 668 }
663 669
664 if (request)
665 add_wait_queue(&request->execute, &exec);
666
667 schedule(); 670 schedule();
668
669 if (request)
670 remove_wait_queue(&request->execute, &exec);
671 } 671 }
672 i915_gem_request_put(request); 672 i915_gem_request_put(request);
673 } while (1); 673 } while (1);
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index e0843bb99169..58a3755544b2 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -2128,6 +2128,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder,
2128 if (WARN_ON(!pll)) 2128 if (WARN_ON(!pll))
2129 return; 2129 return;
2130 2130
2131 mutex_lock(&dev_priv->dpll_lock);
2132
2131 if (IS_CANNONLAKE(dev_priv)) { 2133 if (IS_CANNONLAKE(dev_priv)) {
2132 /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */ 2134 /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */
2133 val = I915_READ(DPCLKA_CFGCR0); 2135 val = I915_READ(DPCLKA_CFGCR0);
@@ -2157,6 +2159,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder,
2157 } else if (INTEL_INFO(dev_priv)->gen < 9) { 2159 } else if (INTEL_INFO(dev_priv)->gen < 9) {
2158 I915_WRITE(PORT_CLK_SEL(port), hsw_pll_to_ddi_pll_sel(pll)); 2160 I915_WRITE(PORT_CLK_SEL(port), hsw_pll_to_ddi_pll_sel(pll));
2159 } 2161 }
2162
2163 mutex_unlock(&dev_priv->dpll_lock);
2160} 2164}
2161 2165
2162static void intel_ddi_clk_disable(struct intel_encoder *encoder) 2166static void intel_ddi_clk_disable(struct intel_encoder *encoder)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index e8ccf89cb17b..30cf273d57aa 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9944,11 +9944,10 @@ found:
9944 } 9944 }
9945 9945
9946 ret = intel_modeset_setup_plane_state(state, crtc, mode, fb, 0, 0); 9946 ret = intel_modeset_setup_plane_state(state, crtc, mode, fb, 0, 0);
9947 drm_framebuffer_put(fb);
9947 if (ret) 9948 if (ret)
9948 goto fail; 9949 goto fail;
9949 9950
9950 drm_framebuffer_put(fb);
9951
9952 ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode); 9951 ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode);
9953 if (ret) 9952 if (ret)
9954 goto fail; 9953 goto fail;
@@ -13195,7 +13194,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
13195 primary->frontbuffer_bit = INTEL_FRONTBUFFER_PRIMARY(pipe); 13194 primary->frontbuffer_bit = INTEL_FRONTBUFFER_PRIMARY(pipe);
13196 primary->check_plane = intel_check_primary_plane; 13195 primary->check_plane = intel_check_primary_plane;
13197 13196
13198 if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { 13197 if (INTEL_GEN(dev_priv) >= 10) {
13199 intel_primary_formats = skl_primary_formats; 13198 intel_primary_formats = skl_primary_formats;
13200 num_formats = ARRAY_SIZE(skl_primary_formats); 13199 num_formats = ARRAY_SIZE(skl_primary_formats);
13201 modifiers = skl_format_modifiers_ccs; 13200 modifiers = skl_format_modifiers_ccs;
diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c
index 3bf65288ffff..5809b29044fc 100644
--- a/drivers/gpu/drm/i915/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/intel_lpe_audio.c
@@ -193,7 +193,7 @@ static bool lpe_audio_detect(struct drm_i915_private *dev_priv)
193 }; 193 };
194 194
195 if (!pci_dev_present(atom_hdaudio_ids)) { 195 if (!pci_dev_present(atom_hdaudio_ids)) {
196 DRM_INFO("%s\n", "HDaudio controller not detected, using LPE audio instead\n"); 196 DRM_INFO("HDaudio controller not detected, using LPE audio instead\n");
197 lpe_present = true; 197 lpe_present = true;
198 } 198 }
199 } 199 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 2615912430cc..435ff8662cfa 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -224,7 +224,7 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
224 /* Determine if we can get a cache-coherent map, forcing 224 /* Determine if we can get a cache-coherent map, forcing
225 * uncached mapping if we can't. 225 * uncached mapping if we can't.
226 */ 226 */
227 if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED) 227 if (!nouveau_drm_use_coherent_gpu_mapping(drm))
228 nvbo->force_coherent = true; 228 nvbo->force_coherent = true;
229 } 229 }
230 230
@@ -262,7 +262,8 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
262 if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE && 262 if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE &&
263 (flags & TTM_PL_FLAG_VRAM) && !vmm->page[i].vram) 263 (flags & TTM_PL_FLAG_VRAM) && !vmm->page[i].vram)
264 continue; 264 continue;
265 if ((flags & TTM_PL_FLAG_TT ) && !vmm->page[i].host) 265 if ((flags & TTM_PL_FLAG_TT) &&
266 (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
266 continue; 267 continue;
267 268
268 /* Select this page size if it's the first that supports 269 /* Select this page size if it's the first that supports
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 8d4a5be3b913..56fe261b6268 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -152,9 +152,9 @@ nouveau_cli_work_queue(struct nouveau_cli *cli, struct dma_fence *fence,
152 work->cli = cli; 152 work->cli = cli;
153 mutex_lock(&cli->lock); 153 mutex_lock(&cli->lock);
154 list_add_tail(&work->head, &cli->worker); 154 list_add_tail(&work->head, &cli->worker);
155 mutex_unlock(&cli->lock);
156 if (dma_fence_add_callback(fence, &work->cb, nouveau_cli_work_fence)) 155 if (dma_fence_add_callback(fence, &work->cb, nouveau_cli_work_fence))
157 nouveau_cli_work_fence(fence, &work->cb); 156 nouveau_cli_work_fence(fence, &work->cb);
157 mutex_unlock(&cli->lock);
158} 158}
159 159
160static void 160static void
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 3331e82ae9e7..96f6bd8aee5d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -157,8 +157,8 @@ struct nouveau_drm {
157 struct nvif_object copy; 157 struct nvif_object copy;
158 int mtrr; 158 int mtrr;
159 int type_vram; 159 int type_vram;
160 int type_host; 160 int type_host[2];
161 int type_ncoh; 161 int type_ncoh[2];
162 } ttm; 162 } ttm;
163 163
164 /* GEM interface support */ 164 /* GEM interface support */
@@ -217,6 +217,13 @@ nouveau_drm(struct drm_device *dev)
217 return dev->dev_private; 217 return dev->dev_private;
218} 218}
219 219
220static inline bool
221nouveau_drm_use_coherent_gpu_mapping(struct nouveau_drm *drm)
222{
223 struct nvif_mmu *mmu = &drm->client.mmu;
224 return !(mmu->type[drm->ttm.type_host[0]].type & NVIF_MEM_UNCACHED);
225}
226
220int nouveau_pmops_suspend(struct device *); 227int nouveau_pmops_suspend(struct device *);
221int nouveau_pmops_resume(struct device *); 228int nouveau_pmops_resume(struct device *);
222bool nouveau_pmops_runtime(void); 229bool nouveau_pmops_runtime(void);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index c533d8e04afc..be7357bf2246 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -429,7 +429,7 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon)
429 drm_fb_helper_unregister_fbi(&fbcon->helper); 429 drm_fb_helper_unregister_fbi(&fbcon->helper);
430 drm_fb_helper_fini(&fbcon->helper); 430 drm_fb_helper_fini(&fbcon->helper);
431 431
432 if (nouveau_fb->nvbo) { 432 if (nouveau_fb && nouveau_fb->nvbo) {
433 nouveau_vma_del(&nouveau_fb->vma); 433 nouveau_vma_del(&nouveau_fb->vma);
434 nouveau_bo_unmap(nouveau_fb->nvbo); 434 nouveau_bo_unmap(nouveau_fb->nvbo);
435 nouveau_bo_unpin(nouveau_fb->nvbo); 435 nouveau_bo_unpin(nouveau_fb->nvbo);
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 589a9621db76..c002f8968507 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -103,10 +103,10 @@ nouveau_mem_host(struct ttm_mem_reg *reg, struct ttm_dma_tt *tt)
103 u8 type; 103 u8 type;
104 int ret; 104 int ret;
105 105
106 if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED) 106 if (!nouveau_drm_use_coherent_gpu_mapping(drm))
107 type = drm->ttm.type_ncoh; 107 type = drm->ttm.type_ncoh[!!mem->kind];
108 else 108 else
109 type = drm->ttm.type_host; 109 type = drm->ttm.type_host[0];
110 110
111 if (mem->kind && !(mmu->type[type].type & NVIF_MEM_KIND)) 111 if (mem->kind && !(mmu->type[type].type & NVIF_MEM_KIND))
112 mem->comp = mem->kind = 0; 112 mem->comp = mem->kind = 0;
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index 08b974b30482..dff51a0ee028 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -235,27 +235,46 @@ nouveau_ttm_global_release(struct nouveau_drm *drm)
235 drm->ttm.mem_global_ref.release = NULL; 235 drm->ttm.mem_global_ref.release = NULL;
236} 236}
237 237
238int 238static int
239nouveau_ttm_init(struct nouveau_drm *drm) 239nouveau_ttm_init_host(struct nouveau_drm *drm, u8 kind)
240{ 240{
241 struct nvkm_device *device = nvxx_device(&drm->client.device);
242 struct nvkm_pci *pci = device->pci;
243 struct nvif_mmu *mmu = &drm->client.mmu; 241 struct nvif_mmu *mmu = &drm->client.mmu;
244 struct drm_device *dev = drm->dev; 242 int typei;
245 int typei, ret;
246 243
247 typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE | 244 typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE |
248 NVIF_MEM_COHERENT); 245 kind | NVIF_MEM_COHERENT);
249 if (typei < 0) 246 if (typei < 0)
250 return -ENOSYS; 247 return -ENOSYS;
251 248
252 drm->ttm.type_host = typei; 249 drm->ttm.type_host[!!kind] = typei;
253 250
254 typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE); 251 typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE | kind);
255 if (typei < 0) 252 if (typei < 0)
256 return -ENOSYS; 253 return -ENOSYS;
257 254
258 drm->ttm.type_ncoh = typei; 255 drm->ttm.type_ncoh[!!kind] = typei;
256 return 0;
257}
258
259int
260nouveau_ttm_init(struct nouveau_drm *drm)
261{
262 struct nvkm_device *device = nvxx_device(&drm->client.device);
263 struct nvkm_pci *pci = device->pci;
264 struct nvif_mmu *mmu = &drm->client.mmu;
265 struct drm_device *dev = drm->dev;
266 int typei, ret;
267
268 ret = nouveau_ttm_init_host(drm, 0);
269 if (ret)
270 return ret;
271
272 if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA &&
273 drm->client.device.info.chipset != 0x50) {
274 ret = nouveau_ttm_init_host(drm, NVIF_MEM_KIND);
275 if (ret)
276 return ret;
277 }
259 278
260 if (drm->client.device.info.platform != NV_DEVICE_INFO_V0_SOC && 279 if (drm->client.device.info.platform != NV_DEVICE_INFO_V0_SOC &&
261 drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) { 280 drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c
index 9e2628dd8e4d..f5371d96b003 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c
@@ -67,8 +67,8 @@ nouveau_vma_del(struct nouveau_vma **pvma)
67 nvif_vmm_put(&vma->vmm->vmm, &tmp); 67 nvif_vmm_put(&vma->vmm->vmm, &tmp);
68 } 68 }
69 list_del(&vma->head); 69 list_del(&vma->head);
70 *pvma = NULL;
71 kfree(*pvma); 70 kfree(*pvma);
71 *pvma = NULL;
72 } 72 }
73} 73}
74 74
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index e14643615698..00eeaaffeae5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -2369,7 +2369,7 @@ nv13b_chipset = {
2369 .imem = gk20a_instmem_new, 2369 .imem = gk20a_instmem_new,
2370 .ltc = gp100_ltc_new, 2370 .ltc = gp100_ltc_new,
2371 .mc = gp10b_mc_new, 2371 .mc = gp10b_mc_new,
2372 .mmu = gf100_mmu_new, 2372 .mmu = gp10b_mmu_new,
2373 .secboot = gp10b_secboot_new, 2373 .secboot = gp10b_secboot_new,
2374 .pmu = gm20b_pmu_new, 2374 .pmu = gm20b_pmu_new,
2375 .timer = gk20a_timer_new, 2375 .timer = gk20a_timer_new,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
index 972370ed36f0..7c7efa4ea0d0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
@@ -36,6 +36,7 @@ nvbios_dp_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
36 if (data) { 36 if (data) {
37 *ver = nvbios_rd08(bios, data + 0x00); 37 *ver = nvbios_rd08(bios, data + 0x00);
38 switch (*ver) { 38 switch (*ver) {
39 case 0x20:
39 case 0x21: 40 case 0x21:
40 case 0x30: 41 case 0x30:
41 case 0x40: 42 case 0x40:
@@ -63,6 +64,7 @@ nvbios_dpout_entry(struct nvkm_bios *bios, u8 idx,
63 if (data && idx < *cnt) { 64 if (data && idx < *cnt) {
64 u16 outp = nvbios_rd16(bios, data + *hdr + idx * *len); 65 u16 outp = nvbios_rd16(bios, data + *hdr + idx * *len);
65 switch (*ver * !!outp) { 66 switch (*ver * !!outp) {
67 case 0x20:
66 case 0x21: 68 case 0x21:
67 case 0x30: 69 case 0x30:
68 *hdr = nvbios_rd08(bios, data + 0x04); 70 *hdr = nvbios_rd08(bios, data + 0x04);
@@ -96,12 +98,16 @@ nvbios_dpout_parse(struct nvkm_bios *bios, u8 idx,
96 info->type = nvbios_rd16(bios, data + 0x00); 98 info->type = nvbios_rd16(bios, data + 0x00);
97 info->mask = nvbios_rd16(bios, data + 0x02); 99 info->mask = nvbios_rd16(bios, data + 0x02);
98 switch (*ver) { 100 switch (*ver) {
101 case 0x20:
102 info->mask |= 0x00c0; /* match any link */
103 /* fall-through */
99 case 0x21: 104 case 0x21:
100 case 0x30: 105 case 0x30:
101 info->flags = nvbios_rd08(bios, data + 0x05); 106 info->flags = nvbios_rd08(bios, data + 0x05);
102 info->script[0] = nvbios_rd16(bios, data + 0x06); 107 info->script[0] = nvbios_rd16(bios, data + 0x06);
103 info->script[1] = nvbios_rd16(bios, data + 0x08); 108 info->script[1] = nvbios_rd16(bios, data + 0x08);
104 info->lnkcmp = nvbios_rd16(bios, data + 0x0a); 109 if (*len >= 0x0c)
110 info->lnkcmp = nvbios_rd16(bios, data + 0x0a);
105 if (*len >= 0x0f) { 111 if (*len >= 0x0f) {
106 info->script[2] = nvbios_rd16(bios, data + 0x0c); 112 info->script[2] = nvbios_rd16(bios, data + 0x0c);
107 info->script[3] = nvbios_rd16(bios, data + 0x0e); 113 info->script[3] = nvbios_rd16(bios, data + 0x0e);
@@ -170,6 +176,7 @@ nvbios_dpcfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx,
170 memset(info, 0x00, sizeof(*info)); 176 memset(info, 0x00, sizeof(*info));
171 if (data) { 177 if (data) {
172 switch (*ver) { 178 switch (*ver) {
179 case 0x20:
173 case 0x21: 180 case 0x21:
174 info->dc = nvbios_rd08(bios, data + 0x02); 181 info->dc = nvbios_rd08(bios, data + 0x02);
175 info->pe = nvbios_rd08(bios, data + 0x03); 182 info->pe = nvbios_rd08(bios, data + 0x03);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
index 1ba7289684aa..db48a1daca0c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
@@ -249,7 +249,7 @@ nv50_instobj_acquire(struct nvkm_memory *memory)
249 iobj->base.memory.ptrs = &nv50_instobj_fast; 249 iobj->base.memory.ptrs = &nv50_instobj_fast;
250 else 250 else
251 iobj->base.memory.ptrs = &nv50_instobj_slow; 251 iobj->base.memory.ptrs = &nv50_instobj_slow;
252 refcount_inc(&iobj->maps); 252 refcount_set(&iobj->maps, 1);
253 } 253 }
254 254
255 mutex_unlock(&imem->subdev.mutex); 255 mutex_unlock(&imem->subdev.mutex);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
index b1b1f3626b96..deb96de54b00 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
@@ -136,6 +136,13 @@ nvkm_pci_init(struct nvkm_subdev *subdev)
136 return ret; 136 return ret;
137 137
138 pci->irq = pdev->irq; 138 pci->irq = pdev->irq;
139
140 /* Ensure MSI interrupts are armed, for the case where there are
141 * already interrupts pending (for whatever reason) at load time.
142 */
143 if (pci->msi)
144 pci->func->msi_rearm(pci);
145
139 return ret; 146 return ret;
140} 147}
141 148
diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
index dda904ec0534..500b6fb3e028 100644
--- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
+++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
@@ -175,11 +175,31 @@ static void sun4i_hdmi_mode_set(struct drm_encoder *encoder,
175 writel(val, hdmi->base + SUN4I_HDMI_VID_TIMING_POL_REG); 175 writel(val, hdmi->base + SUN4I_HDMI_VID_TIMING_POL_REG);
176} 176}
177 177
178static enum drm_mode_status sun4i_hdmi_mode_valid(struct drm_encoder *encoder,
179 const struct drm_display_mode *mode)
180{
181 struct sun4i_hdmi *hdmi = drm_encoder_to_sun4i_hdmi(encoder);
182 unsigned long rate = mode->clock * 1000;
183 unsigned long diff = rate / 200; /* +-0.5% allowed by HDMI spec */
184 long rounded_rate;
185
186 /* 165 MHz is the typical max pixelclock frequency for HDMI <= 1.2 */
187 if (rate > 165000000)
188 return MODE_CLOCK_HIGH;
189 rounded_rate = clk_round_rate(hdmi->tmds_clk, rate);
190 if (rounded_rate > 0 &&
191 max_t(unsigned long, rounded_rate, rate) -
192 min_t(unsigned long, rounded_rate, rate) < diff)
193 return MODE_OK;
194 return MODE_NOCLOCK;
195}
196
178static const struct drm_encoder_helper_funcs sun4i_hdmi_helper_funcs = { 197static const struct drm_encoder_helper_funcs sun4i_hdmi_helper_funcs = {
179 .atomic_check = sun4i_hdmi_atomic_check, 198 .atomic_check = sun4i_hdmi_atomic_check,
180 .disable = sun4i_hdmi_disable, 199 .disable = sun4i_hdmi_disable,
181 .enable = sun4i_hdmi_enable, 200 .enable = sun4i_hdmi_enable,
182 .mode_set = sun4i_hdmi_mode_set, 201 .mode_set = sun4i_hdmi_mode_set,
202 .mode_valid = sun4i_hdmi_mode_valid,
183}; 203};
184 204
185static const struct drm_encoder_funcs sun4i_hdmi_funcs = { 205static const struct drm_encoder_funcs sun4i_hdmi_funcs = {
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index e122f5b2a395..f4284b51bdca 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -724,12 +724,12 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
724 if (IS_ERR(tcon->crtc)) { 724 if (IS_ERR(tcon->crtc)) {
725 dev_err(dev, "Couldn't create our CRTC\n"); 725 dev_err(dev, "Couldn't create our CRTC\n");
726 ret = PTR_ERR(tcon->crtc); 726 ret = PTR_ERR(tcon->crtc);
727 goto err_free_clocks; 727 goto err_free_dotclock;
728 } 728 }
729 729
730 ret = sun4i_rgb_init(drm, tcon); 730 ret = sun4i_rgb_init(drm, tcon);
731 if (ret < 0) 731 if (ret < 0)
732 goto err_free_clocks; 732 goto err_free_dotclock;
733 733
734 if (tcon->quirks->needs_de_be_mux) { 734 if (tcon->quirks->needs_de_be_mux) {
735 /* 735 /*
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index 44343a2bf55c..b5ba6441489f 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -455,6 +455,7 @@ ttm_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
455 freed += (nr_free_pool - shrink_pages) << pool->order; 455 freed += (nr_free_pool - shrink_pages) << pool->order;
456 if (freed >= sc->nr_to_scan) 456 if (freed >= sc->nr_to_scan)
457 break; 457 break;
458 shrink_pages <<= pool->order;
458 } 459 }
459 mutex_unlock(&lock); 460 mutex_unlock(&lock);
460 return freed; 461 return freed;
@@ -543,7 +544,7 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags,
543 int r = 0; 544 int r = 0;
544 unsigned i, j, cpages; 545 unsigned i, j, cpages;
545 unsigned npages = 1 << order; 546 unsigned npages = 1 << order;
546 unsigned max_cpages = min(count, (unsigned)NUM_PAGES_TO_ALLOC); 547 unsigned max_cpages = min(count << order, (unsigned)NUM_PAGES_TO_ALLOC);
547 548
548 /* allocate array for page caching change */ 549 /* allocate array for page caching change */
549 caching_array = kmalloc(max_cpages*sizeof(struct page *), GFP_KERNEL); 550 caching_array = kmalloc(max_cpages*sizeof(struct page *), GFP_KERNEL);
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index 6c32c89a83a9..638540943c61 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -888,8 +888,10 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
888 /* If we got force-completed because of GPU reset rather than 888 /* If we got force-completed because of GPU reset rather than
889 * through our IRQ handler, signal the fence now. 889 * through our IRQ handler, signal the fence now.
890 */ 890 */
891 if (exec->fence) 891 if (exec->fence) {
892 dma_fence_signal(exec->fence); 892 dma_fence_signal(exec->fence);
893 dma_fence_put(exec->fence);
894 }
893 895
894 if (exec->bo) { 896 if (exec->bo) {
895 for (i = 0; i < exec->bo_count; i++) { 897 for (i = 0; i < exec->bo_count; i++) {
diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
index 61b2e5377993..26eddbb62893 100644
--- a/drivers/gpu/drm/vc4/vc4_irq.c
+++ b/drivers/gpu/drm/vc4/vc4_irq.c
@@ -139,6 +139,7 @@ vc4_irq_finish_render_job(struct drm_device *dev)
139 list_move_tail(&exec->head, &vc4->job_done_list); 139 list_move_tail(&exec->head, &vc4->job_done_list);
140 if (exec->fence) { 140 if (exec->fence) {
141 dma_fence_signal_locked(exec->fence); 141 dma_fence_signal_locked(exec->fence);
142 dma_fence_put(exec->fence);
142 exec->fence = NULL; 143 exec->fence = NULL;
143 } 144 }
144 vc4_submit_next_render_job(dev); 145 vc4_submit_next_render_job(dev);
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index f3fcb836a1f9..0c3f608131cf 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -551,7 +551,7 @@ static int hid_parser_main(struct hid_parser *parser, struct hid_item *item)
551 ret = hid_add_field(parser, HID_FEATURE_REPORT, data); 551 ret = hid_add_field(parser, HID_FEATURE_REPORT, data);
552 break; 552 break;
553 default: 553 default:
554 hid_err(parser->device, "unknown main item tag 0x%x\n", item->tag); 554 hid_warn(parser->device, "unknown main item tag 0x%x\n", item->tag);
555 ret = 0; 555 ret = 0;
556 } 556 }
557 557
diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c
index 68cdc962265b..271f31461da4 100644
--- a/drivers/hid/hid-cp2112.c
+++ b/drivers/hid/hid-cp2112.c
@@ -696,8 +696,16 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr,
696 (u8 *)&word, 2); 696 (u8 *)&word, 2);
697 break; 697 break;
698 case I2C_SMBUS_I2C_BLOCK_DATA: 698 case I2C_SMBUS_I2C_BLOCK_DATA:
699 size = I2C_SMBUS_BLOCK_DATA; 699 if (read_write == I2C_SMBUS_READ) {
700 /* fallthrough */ 700 read_length = data->block[0];
701 count = cp2112_write_read_req(buf, addr, read_length,
702 command, NULL, 0);
703 } else {
704 count = cp2112_write_req(buf, addr, command,
705 data->block + 1,
706 data->block[0]);
707 }
708 break;
701 case I2C_SMBUS_BLOCK_DATA: 709 case I2C_SMBUS_BLOCK_DATA:
702 if (I2C_SMBUS_READ == read_write) { 710 if (I2C_SMBUS_READ == read_write) {
703 count = cp2112_write_read_req(buf, addr, 711 count = cp2112_write_read_req(buf, addr,
@@ -785,6 +793,9 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr,
785 case I2C_SMBUS_WORD_DATA: 793 case I2C_SMBUS_WORD_DATA:
786 data->word = le16_to_cpup((__le16 *)buf); 794 data->word = le16_to_cpup((__le16 *)buf);
787 break; 795 break;
796 case I2C_SMBUS_I2C_BLOCK_DATA:
797 memcpy(data->block + 1, buf, read_length);
798 break;
788 case I2C_SMBUS_BLOCK_DATA: 799 case I2C_SMBUS_BLOCK_DATA:
789 if (read_length > I2C_SMBUS_BLOCK_MAX) { 800 if (read_length > I2C_SMBUS_BLOCK_MAX) {
790 ret = -EPROTO; 801 ret = -EPROTO;
diff --git a/drivers/hid/hid-holtekff.c b/drivers/hid/hid-holtekff.c
index 9325545fc3ae..edc0f64bb584 100644
--- a/drivers/hid/hid-holtekff.c
+++ b/drivers/hid/hid-holtekff.c
@@ -32,10 +32,6 @@
32 32
33#ifdef CONFIG_HOLTEK_FF 33#ifdef CONFIG_HOLTEK_FF
34 34
35MODULE_LICENSE("GPL");
36MODULE_AUTHOR("Anssi Hannula <anssi.hannula@iki.fi>");
37MODULE_DESCRIPTION("Force feedback support for Holtek On Line Grip based devices");
38
39/* 35/*
40 * These commands and parameters are currently known: 36 * These commands and parameters are currently known:
41 * 37 *
@@ -223,3 +219,7 @@ static struct hid_driver holtek_driver = {
223 .probe = holtek_probe, 219 .probe = holtek_probe,
224}; 220};
225module_hid_driver(holtek_driver); 221module_hid_driver(holtek_driver);
222
223MODULE_LICENSE("GPL");
224MODULE_AUTHOR("Anssi Hannula <anssi.hannula@iki.fi>");
225MODULE_DESCRIPTION("Force feedback support for Holtek On Line Grip based devices");
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 76ed9a216f10..610223f0e945 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1378,6 +1378,8 @@ void vmbus_device_unregister(struct hv_device *device_obj)
1378 pr_debug("child device %s unregistered\n", 1378 pr_debug("child device %s unregistered\n",
1379 dev_name(&device_obj->device)); 1379 dev_name(&device_obj->device));
1380 1380
1381 kset_unregister(device_obj->channels_kset);
1382
1381 /* 1383 /*
1382 * Kick off the process of unregistering the device. 1384 * Kick off the process of unregistering the device.
1383 * This will call vmbus_remove() and eventually vmbus_device_release() 1385 * This will call vmbus_remove() and eventually vmbus_device_release()
diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index c9790e2c3440..af5123042990 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -143,6 +143,7 @@ static int hwmon_thermal_add_sensor(struct device *dev,
143 struct hwmon_device *hwdev, int index) 143 struct hwmon_device *hwdev, int index)
144{ 144{
145 struct hwmon_thermal_data *tdata; 145 struct hwmon_thermal_data *tdata;
146 struct thermal_zone_device *tzd;
146 147
147 tdata = devm_kzalloc(dev, sizeof(*tdata), GFP_KERNEL); 148 tdata = devm_kzalloc(dev, sizeof(*tdata), GFP_KERNEL);
148 if (!tdata) 149 if (!tdata)
@@ -151,8 +152,14 @@ static int hwmon_thermal_add_sensor(struct device *dev,
151 tdata->hwdev = hwdev; 152 tdata->hwdev = hwdev;
152 tdata->index = index; 153 tdata->index = index;
153 154
154 devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata, 155 tzd = devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata,
155 &hwmon_thermal_ops); 156 &hwmon_thermal_ops);
157 /*
158 * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV,
159 * so ignore that error but forward any other error.
160 */
161 if (IS_ERR(tzd) && (PTR_ERR(tzd) != -ENODEV))
162 return PTR_ERR(tzd);
156 163
157 return 0; 164 return 0;
158} 165}
@@ -621,14 +628,20 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
621 if (!chip->ops->is_visible(drvdata, hwmon_temp, 628 if (!chip->ops->is_visible(drvdata, hwmon_temp,
622 hwmon_temp_input, j)) 629 hwmon_temp_input, j))
623 continue; 630 continue;
624 if (info[i]->config[j] & HWMON_T_INPUT) 631 if (info[i]->config[j] & HWMON_T_INPUT) {
625 hwmon_thermal_add_sensor(dev, hwdev, j); 632 err = hwmon_thermal_add_sensor(dev,
633 hwdev, j);
634 if (err)
635 goto free_device;
636 }
626 } 637 }
627 } 638 }
628 } 639 }
629 640
630 return hdev; 641 return hdev;
631 642
643free_device:
644 device_unregister(hdev);
632free_hwmon: 645free_hwmon:
633 kfree(hwdev); 646 kfree(hwdev);
634ida_remove: 647ida_remove:
diff --git a/drivers/hwtracing/stm/ftrace.c b/drivers/hwtracing/stm/ftrace.c
index bd126a7c6da2..7da75644c750 100644
--- a/drivers/hwtracing/stm/ftrace.c
+++ b/drivers/hwtracing/stm/ftrace.c
@@ -42,9 +42,11 @@ static struct stm_ftrace {
42 * @len: length of the data packet 42 * @len: length of the data packet
43 */ 43 */
44static void notrace 44static void notrace
45stm_ftrace_write(const void *buf, unsigned int len) 45stm_ftrace_write(struct trace_export *export, const void *buf, unsigned int len)
46{ 46{
47 stm_source_write(&stm_ftrace.data, STM_FTRACE_CHAN, buf, len); 47 struct stm_ftrace *stm = container_of(export, struct stm_ftrace, ftrace);
48
49 stm_source_write(&stm->data, STM_FTRACE_CHAN, buf, len);
48} 50}
49 51
50static int stm_ftrace_link(struct stm_source_data *data) 52static int stm_ftrace_link(struct stm_source_data *data)
diff --git a/drivers/i2c/busses/i2c-cht-wc.c b/drivers/i2c/busses/i2c-cht-wc.c
index 0d05dadb2dc5..44cffad43701 100644
--- a/drivers/i2c/busses/i2c-cht-wc.c
+++ b/drivers/i2c/busses/i2c-cht-wc.c
@@ -379,7 +379,7 @@ static int cht_wc_i2c_adap_i2c_remove(struct platform_device *pdev)
379 return 0; 379 return 0;
380} 380}
381 381
382static struct platform_device_id cht_wc_i2c_adap_id_table[] = { 382static const struct platform_device_id cht_wc_i2c_adap_id_table[] = {
383 { .name = "cht_wcove_ext_chgr" }, 383 { .name = "cht_wcove_ext_chgr" },
384 {}, 384 {},
385}; 385};
diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index 174579d32e5f..462948e2c535 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -983,7 +983,7 @@ static void piix4_adap_remove(struct i2c_adapter *adap)
983 983
984 if (adapdata->smba) { 984 if (adapdata->smba) {
985 i2c_del_adapter(adap); 985 i2c_del_adapter(adap);
986 if (adapdata->port == (0 << 1)) { 986 if (adapdata->port == (0 << piix4_port_shift_sb800)) {
987 release_region(adapdata->smba, SMBIOSIZE); 987 release_region(adapdata->smba, SMBIOSIZE);
988 if (adapdata->sb800_main) 988 if (adapdata->sb800_main)
989 release_region(SB800_PIIX4_SMB_IDX, 2); 989 release_region(SB800_PIIX4_SMB_IDX, 2);
diff --git a/drivers/i2c/busses/i2c-stm32.h b/drivers/i2c/busses/i2c-stm32.h
index dab51761f8c5..d4f9cef251ac 100644
--- a/drivers/i2c/busses/i2c-stm32.h
+++ b/drivers/i2c/busses/i2c-stm32.h
@@ -1,10 +1,11 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * i2c-stm32.h 3 * i2c-stm32.h
3 * 4 *
4 * Copyright (C) M'boumba Cedric Madianga 2017 5 * Copyright (C) M'boumba Cedric Madianga 2017
6 * Copyright (C) STMicroelectronics 2017
5 * Author: M'boumba Cedric Madianga <cedric.madianga@gmail.com> 7 * Author: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
6 * 8 *
7 * License terms: GNU General Public License (GPL), version 2
8 */ 9 */
9 10
10#ifndef _I2C_STM32_H 11#ifndef _I2C_STM32_H
diff --git a/drivers/i2c/busses/i2c-stm32f4.c b/drivers/i2c/busses/i2c-stm32f4.c
index 4ec108496f15..47c8d00de53f 100644
--- a/drivers/i2c/busses/i2c-stm32f4.c
+++ b/drivers/i2c/busses/i2c-stm32f4.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Driver for STMicroelectronics STM32 I2C controller 3 * Driver for STMicroelectronics STM32 I2C controller
3 * 4 *
@@ -6,11 +7,11 @@
6 * http://www.st.com/resource/en/reference_manual/DM00031020.pdf 7 * http://www.st.com/resource/en/reference_manual/DM00031020.pdf
7 * 8 *
8 * Copyright (C) M'boumba Cedric Madianga 2016 9 * Copyright (C) M'boumba Cedric Madianga 2016
10 * Copyright (C) STMicroelectronics 2017
9 * Author: M'boumba Cedric Madianga <cedric.madianga@gmail.com> 11 * Author: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
10 * 12 *
11 * This driver is based on i2c-st.c 13 * This driver is based on i2c-st.c
12 * 14 *
13 * License terms: GNU General Public License (GPL), version 2
14 */ 15 */
15 16
16#include <linux/clk.h> 17#include <linux/clk.h>
diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c
index d4a6e9c2e9aa..b445b3bb0bb1 100644
--- a/drivers/i2c/busses/i2c-stm32f7.c
+++ b/drivers/i2c/busses/i2c-stm32f7.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Driver for STMicroelectronics STM32F7 I2C controller 3 * Driver for STMicroelectronics STM32F7 I2C controller
3 * 4 *
@@ -7,11 +8,11 @@
7 * http://www.st.com/resource/en/reference_manual/dm00124865.pdf 8 * http://www.st.com/resource/en/reference_manual/dm00124865.pdf
8 * 9 *
9 * Copyright (C) M'boumba Cedric Madianga 2017 10 * Copyright (C) M'boumba Cedric Madianga 2017
11 * Copyright (C) STMicroelectronics 2017
10 * Author: M'boumba Cedric Madianga <cedric.madianga@gmail.com> 12 * Author: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
11 * 13 *
12 * This driver is based on i2c-stm32f4.c 14 * This driver is based on i2c-stm32f4.c
13 * 15 *
14 * License terms: GNU General Public License (GPL), version 2
15 */ 16 */
16#include <linux/clk.h> 17#include <linux/clk.h>
17#include <linux/delay.h> 18#include <linux/delay.h>
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index f6983357145d..6294a7001d33 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -4458,7 +4458,7 @@ out:
4458 return skb->len; 4458 return skb->len;
4459} 4459}
4460 4460
4461static const struct rdma_nl_cbs cma_cb_table[] = { 4461static const struct rdma_nl_cbs cma_cb_table[RDMA_NL_RDMA_CM_NUM_OPS] = {
4462 [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats}, 4462 [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats},
4463}; 4463};
4464 4464
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 5e1be4949d5f..30914f3baa5f 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1146,7 +1146,7 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
1146} 1146}
1147EXPORT_SYMBOL(ib_get_net_dev_by_params); 1147EXPORT_SYMBOL(ib_get_net_dev_by_params);
1148 1148
1149static const struct rdma_nl_cbs ibnl_ls_cb_table[] = { 1149static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
1150 [RDMA_NL_LS_OP_RESOLVE] = { 1150 [RDMA_NL_LS_OP_RESOLVE] = {
1151 .doit = ib_nl_handle_resolve_resp, 1151 .doit = ib_nl_handle_resolve_resp,
1152 .flags = RDMA_NL_ADMIN_PERM, 1152 .flags = RDMA_NL_ADMIN_PERM,
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index e9e189ec7502..5d676cff41f4 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -80,7 +80,7 @@ const char *__attribute_const__ iwcm_reject_msg(int reason)
80} 80}
81EXPORT_SYMBOL(iwcm_reject_msg); 81EXPORT_SYMBOL(iwcm_reject_msg);
82 82
83static struct rdma_nl_cbs iwcm_nl_cb_table[] = { 83static struct rdma_nl_cbs iwcm_nl_cb_table[RDMA_NL_IWPM_NUM_OPS] = {
84 [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, 84 [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
85 [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, 85 [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
86 [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, 86 [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 2fae850a3eff..9a05245a1acf 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -303,7 +303,7 @@ out: cb->args[0] = idx;
303 return skb->len; 303 return skb->len;
304} 304}
305 305
306static const struct rdma_nl_cbs nldev_cb_table[] = { 306static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
307 [RDMA_NLDEV_CMD_GET] = { 307 [RDMA_NLDEV_CMD_GET] = {
308 .doit = nldev_get_doit, 308 .doit = nldev_get_doit,
309 .dump = nldev_get_dumpit, 309 .dump = nldev_get_dumpit,
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index a337386652b0..59b2f96d986a 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -386,6 +386,9 @@ int ib_open_shared_qp_security(struct ib_qp *qp, struct ib_device *dev)
386 if (ret) 386 if (ret)
387 return ret; 387 return ret;
388 388
389 if (!qp->qp_sec)
390 return 0;
391
389 mutex_lock(&real_qp->qp_sec->mutex); 392 mutex_lock(&real_qp->qp_sec->mutex);
390 ret = check_qp_port_pkey_settings(real_qp->qp_sec->ports_pkeys, 393 ret = check_qp_port_pkey_settings(real_qp->qp_sec->ports_pkeys,
391 qp->qp_sec); 394 qp->qp_sec);
@@ -739,8 +742,11 @@ int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index)
739 if (!rdma_protocol_ib(map->agent.device, map->agent.port_num)) 742 if (!rdma_protocol_ib(map->agent.device, map->agent.port_num))
740 return 0; 743 return 0;
741 744
742 if (map->agent.qp->qp_type == IB_QPT_SMI && !map->agent.smp_allowed) 745 if (map->agent.qp->qp_type == IB_QPT_SMI) {
743 return -EACCES; 746 if (!map->agent.smp_allowed)
747 return -EACCES;
748 return 0;
749 }
744 750
745 return ib_security_pkey_access(map->agent.device, 751 return ib_security_pkey_access(map->agent.device,
746 map->agent.port_num, 752 map->agent.port_num,
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 16d55710b116..840b24096690 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1971,6 +1971,12 @@ static int modify_qp(struct ib_uverbs_file *file,
1971 goto release_qp; 1971 goto release_qp;
1972 } 1972 }
1973 1973
1974 if ((cmd->base.attr_mask & IB_QP_ALT_PATH) &&
1975 !rdma_is_port_valid(qp->device, cmd->base.alt_port_num)) {
1976 ret = -EINVAL;
1977 goto release_qp;
1978 }
1979
1974 attr->qp_state = cmd->base.qp_state; 1980 attr->qp_state = cmd->base.qp_state;
1975 attr->cur_qp_state = cmd->base.cur_qp_state; 1981 attr->cur_qp_state = cmd->base.cur_qp_state;
1976 attr->path_mtu = cmd->base.path_mtu; 1982 attr->path_mtu = cmd->base.path_mtu;
@@ -2068,8 +2074,8 @@ int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
2068 return -EOPNOTSUPP; 2074 return -EOPNOTSUPP;
2069 2075
2070 if (ucore->inlen > sizeof(cmd)) { 2076 if (ucore->inlen > sizeof(cmd)) {
2071 if (ib_is_udata_cleared(ucore, sizeof(cmd), 2077 if (!ib_is_udata_cleared(ucore, sizeof(cmd),
2072 ucore->inlen - sizeof(cmd))) 2078 ucore->inlen - sizeof(cmd)))
2073 return -EOPNOTSUPP; 2079 return -EOPNOTSUPP;
2074 } 2080 }
2075 2081
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 3fb8fb6cc824..e36d27ed4daa 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1438,7 +1438,8 @@ int ib_close_qp(struct ib_qp *qp)
1438 spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); 1438 spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
1439 1439
1440 atomic_dec(&real_qp->usecnt); 1440 atomic_dec(&real_qp->usecnt);
1441 ib_close_shared_qp_security(qp->qp_sec); 1441 if (qp->qp_sec)
1442 ib_close_shared_qp_security(qp->qp_sec);
1442 kfree(qp); 1443 kfree(qp);
1443 1444
1444 return 0; 1445 return 0;
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index ea55e95cd2c5..6f2b26126c64 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -395,6 +395,11 @@ next_cqe:
395 395
396static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq) 396static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
397{ 397{
398 if (DRAIN_CQE(cqe)) {
399 WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq->sq.qid);
400 return 0;
401 }
402
398 if (CQE_OPCODE(cqe) == FW_RI_TERMINATE) 403 if (CQE_OPCODE(cqe) == FW_RI_TERMINATE)
399 return 0; 404 return 0;
400 405
@@ -489,7 +494,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
489 /* 494 /*
490 * Special cqe for drain WR completions... 495 * Special cqe for drain WR completions...
491 */ 496 */
492 if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) { 497 if (DRAIN_CQE(hw_cqe)) {
493 *cookie = CQE_DRAIN_COOKIE(hw_cqe); 498 *cookie = CQE_DRAIN_COOKIE(hw_cqe);
494 *cqe = *hw_cqe; 499 *cqe = *hw_cqe;
495 goto skip_cqe; 500 goto skip_cqe;
@@ -566,10 +571,10 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
566 ret = -EAGAIN; 571 ret = -EAGAIN;
567 goto skip_cqe; 572 goto skip_cqe;
568 } 573 }
569 if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) { 574 if (unlikely(!CQE_STATUS(hw_cqe) &&
575 CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) {
570 t4_set_wq_in_error(wq); 576 t4_set_wq_in_error(wq);
571 hw_cqe->header |= htonl(CQE_STATUS_V(T4_ERR_MSN)); 577 hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN));
572 goto proc_cqe;
573 } 578 }
574 goto proc_cqe; 579 goto proc_cqe;
575 } 580 }
@@ -743,9 +748,6 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
743 c4iw_invalidate_mr(qhp->rhp, 748 c4iw_invalidate_mr(qhp->rhp,
744 CQE_WRID_FR_STAG(&cqe)); 749 CQE_WRID_FR_STAG(&cqe));
745 break; 750 break;
746 case C4IW_DRAIN_OPCODE:
747 wc->opcode = IB_WC_SEND;
748 break;
749 default: 751 default:
750 pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", 752 pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
751 CQE_OPCODE(&cqe), CQE_QPID(&cqe)); 753 CQE_OPCODE(&cqe), CQE_QPID(&cqe));
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 470f97a79ebb..65dd3726ca02 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -693,8 +693,6 @@ static inline int to_ib_qp_state(int c4iw_qp_state)
693 return IB_QPS_ERR; 693 return IB_QPS_ERR;
694} 694}
695 695
696#define C4IW_DRAIN_OPCODE FW_RI_SGE_EC_CR_RETURN
697
698static inline u32 c4iw_ib_to_tpt_access(int a) 696static inline u32 c4iw_ib_to_tpt_access(int a)
699{ 697{
700 return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) | 698 return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) |
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 5ee7fe433136..d5c92fc520d6 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -790,21 +790,57 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
790 return 0; 790 return 0;
791} 791}
792 792
793static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) 793static int ib_to_fw_opcode(int ib_opcode)
794{
795 int opcode;
796
797 switch (ib_opcode) {
798 case IB_WR_SEND_WITH_INV:
799 opcode = FW_RI_SEND_WITH_INV;
800 break;
801 case IB_WR_SEND:
802 opcode = FW_RI_SEND;
803 break;
804 case IB_WR_RDMA_WRITE:
805 opcode = FW_RI_RDMA_WRITE;
806 break;
807 case IB_WR_RDMA_READ:
808 case IB_WR_RDMA_READ_WITH_INV:
809 opcode = FW_RI_READ_REQ;
810 break;
811 case IB_WR_REG_MR:
812 opcode = FW_RI_FAST_REGISTER;
813 break;
814 case IB_WR_LOCAL_INV:
815 opcode = FW_RI_LOCAL_INV;
816 break;
817 default:
818 opcode = -EINVAL;
819 }
820 return opcode;
821}
822
823static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
794{ 824{
795 struct t4_cqe cqe = {}; 825 struct t4_cqe cqe = {};
796 struct c4iw_cq *schp; 826 struct c4iw_cq *schp;
797 unsigned long flag; 827 unsigned long flag;
798 struct t4_cq *cq; 828 struct t4_cq *cq;
829 int opcode;
799 830
800 schp = to_c4iw_cq(qhp->ibqp.send_cq); 831 schp = to_c4iw_cq(qhp->ibqp.send_cq);
801 cq = &schp->cq; 832 cq = &schp->cq;
802 833
834 opcode = ib_to_fw_opcode(wr->opcode);
835 if (opcode < 0)
836 return opcode;
837
803 cqe.u.drain_cookie = wr->wr_id; 838 cqe.u.drain_cookie = wr->wr_id;
804 cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | 839 cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
805 CQE_OPCODE_V(C4IW_DRAIN_OPCODE) | 840 CQE_OPCODE_V(opcode) |
806 CQE_TYPE_V(1) | 841 CQE_TYPE_V(1) |
807 CQE_SWCQE_V(1) | 842 CQE_SWCQE_V(1) |
843 CQE_DRAIN_V(1) |
808 CQE_QPID_V(qhp->wq.sq.qid)); 844 CQE_QPID_V(qhp->wq.sq.qid));
809 845
810 spin_lock_irqsave(&schp->lock, flag); 846 spin_lock_irqsave(&schp->lock, flag);
@@ -819,6 +855,23 @@ static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
819 schp->ibcq.cq_context); 855 schp->ibcq.cq_context);
820 spin_unlock_irqrestore(&schp->comp_handler_lock, flag); 856 spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
821 } 857 }
858 return 0;
859}
860
861static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr,
862 struct ib_send_wr **bad_wr)
863{
864 int ret = 0;
865
866 while (wr) {
867 ret = complete_sq_drain_wr(qhp, wr);
868 if (ret) {
869 *bad_wr = wr;
870 break;
871 }
872 wr = wr->next;
873 }
874 return ret;
822} 875}
823 876
824static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) 877static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
@@ -833,9 +886,10 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
833 886
834 cqe.u.drain_cookie = wr->wr_id; 887 cqe.u.drain_cookie = wr->wr_id;
835 cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | 888 cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
836 CQE_OPCODE_V(C4IW_DRAIN_OPCODE) | 889 CQE_OPCODE_V(FW_RI_SEND) |
837 CQE_TYPE_V(0) | 890 CQE_TYPE_V(0) |
838 CQE_SWCQE_V(1) | 891 CQE_SWCQE_V(1) |
892 CQE_DRAIN_V(1) |
839 CQE_QPID_V(qhp->wq.sq.qid)); 893 CQE_QPID_V(qhp->wq.sq.qid));
840 894
841 spin_lock_irqsave(&rchp->lock, flag); 895 spin_lock_irqsave(&rchp->lock, flag);
@@ -852,6 +906,14 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
852 } 906 }
853} 907}
854 908
909static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
910{
911 while (wr) {
912 complete_rq_drain_wr(qhp, wr);
913 wr = wr->next;
914 }
915}
916
855int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 917int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
856 struct ib_send_wr **bad_wr) 918 struct ib_send_wr **bad_wr)
857{ 919{
@@ -868,9 +930,14 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
868 930
869 qhp = to_c4iw_qp(ibqp); 931 qhp = to_c4iw_qp(ibqp);
870 spin_lock_irqsave(&qhp->lock, flag); 932 spin_lock_irqsave(&qhp->lock, flag);
871 if (t4_wq_in_error(&qhp->wq)) { 933
934 /*
935 * If the qp has been flushed, then just insert a special
936 * drain cqe.
937 */
938 if (qhp->wq.flushed) {
872 spin_unlock_irqrestore(&qhp->lock, flag); 939 spin_unlock_irqrestore(&qhp->lock, flag);
873 complete_sq_drain_wr(qhp, wr); 940 err = complete_sq_drain_wrs(qhp, wr, bad_wr);
874 return err; 941 return err;
875 } 942 }
876 num_wrs = t4_sq_avail(&qhp->wq); 943 num_wrs = t4_sq_avail(&qhp->wq);
@@ -1011,9 +1078,14 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1011 1078
1012 qhp = to_c4iw_qp(ibqp); 1079 qhp = to_c4iw_qp(ibqp);
1013 spin_lock_irqsave(&qhp->lock, flag); 1080 spin_lock_irqsave(&qhp->lock, flag);
1014 if (t4_wq_in_error(&qhp->wq)) { 1081
1082 /*
1083 * If the qp has been flushed, then just insert a special
1084 * drain cqe.
1085 */
1086 if (qhp->wq.flushed) {
1015 spin_unlock_irqrestore(&qhp->lock, flag); 1087 spin_unlock_irqrestore(&qhp->lock, flag);
1016 complete_rq_drain_wr(qhp, wr); 1088 complete_rq_drain_wrs(qhp, wr);
1017 return err; 1089 return err;
1018 } 1090 }
1019 num_wrs = t4_rq_avail(&qhp->wq); 1091 num_wrs = t4_rq_avail(&qhp->wq);
@@ -1285,21 +1357,21 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
1285 spin_unlock_irqrestore(&rchp->lock, flag); 1357 spin_unlock_irqrestore(&rchp->lock, flag);
1286 1358
1287 if (schp == rchp) { 1359 if (schp == rchp) {
1288 if (t4_clear_cq_armed(&rchp->cq) && 1360 if ((rq_flushed || sq_flushed) &&
1289 (rq_flushed || sq_flushed)) { 1361 t4_clear_cq_armed(&rchp->cq)) {
1290 spin_lock_irqsave(&rchp->comp_handler_lock, flag); 1362 spin_lock_irqsave(&rchp->comp_handler_lock, flag);
1291 (*rchp->ibcq.comp_handler)(&rchp->ibcq, 1363 (*rchp->ibcq.comp_handler)(&rchp->ibcq,
1292 rchp->ibcq.cq_context); 1364 rchp->ibcq.cq_context);
1293 spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); 1365 spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
1294 } 1366 }
1295 } else { 1367 } else {
1296 if (t4_clear_cq_armed(&rchp->cq) && rq_flushed) { 1368 if (rq_flushed && t4_clear_cq_armed(&rchp->cq)) {
1297 spin_lock_irqsave(&rchp->comp_handler_lock, flag); 1369 spin_lock_irqsave(&rchp->comp_handler_lock, flag);
1298 (*rchp->ibcq.comp_handler)(&rchp->ibcq, 1370 (*rchp->ibcq.comp_handler)(&rchp->ibcq,
1299 rchp->ibcq.cq_context); 1371 rchp->ibcq.cq_context);
1300 spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); 1372 spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
1301 } 1373 }
1302 if (t4_clear_cq_armed(&schp->cq) && sq_flushed) { 1374 if (sq_flushed && t4_clear_cq_armed(&schp->cq)) {
1303 spin_lock_irqsave(&schp->comp_handler_lock, flag); 1375 spin_lock_irqsave(&schp->comp_handler_lock, flag);
1304 (*schp->ibcq.comp_handler)(&schp->ibcq, 1376 (*schp->ibcq.comp_handler)(&schp->ibcq,
1305 schp->ibcq.cq_context); 1377 schp->ibcq.cq_context);
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index e9ea94268d51..79e8ee12c391 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -197,6 +197,11 @@ struct t4_cqe {
197#define CQE_SWCQE_G(x) ((((x) >> CQE_SWCQE_S)) & CQE_SWCQE_M) 197#define CQE_SWCQE_G(x) ((((x) >> CQE_SWCQE_S)) & CQE_SWCQE_M)
198#define CQE_SWCQE_V(x) ((x)<<CQE_SWCQE_S) 198#define CQE_SWCQE_V(x) ((x)<<CQE_SWCQE_S)
199 199
200#define CQE_DRAIN_S 10
201#define CQE_DRAIN_M 0x1
202#define CQE_DRAIN_G(x) ((((x) >> CQE_DRAIN_S)) & CQE_DRAIN_M)
203#define CQE_DRAIN_V(x) ((x)<<CQE_DRAIN_S)
204
200#define CQE_STATUS_S 5 205#define CQE_STATUS_S 5
201#define CQE_STATUS_M 0x1F 206#define CQE_STATUS_M 0x1F
202#define CQE_STATUS_G(x) ((((x) >> CQE_STATUS_S)) & CQE_STATUS_M) 207#define CQE_STATUS_G(x) ((((x) >> CQE_STATUS_S)) & CQE_STATUS_M)
@@ -213,6 +218,7 @@ struct t4_cqe {
213#define CQE_OPCODE_V(x) ((x)<<CQE_OPCODE_S) 218#define CQE_OPCODE_V(x) ((x)<<CQE_OPCODE_S)
214 219
215#define SW_CQE(x) (CQE_SWCQE_G(be32_to_cpu((x)->header))) 220#define SW_CQE(x) (CQE_SWCQE_G(be32_to_cpu((x)->header)))
221#define DRAIN_CQE(x) (CQE_DRAIN_G(be32_to_cpu((x)->header)))
216#define CQE_QPID(x) (CQE_QPID_G(be32_to_cpu((x)->header))) 222#define CQE_QPID(x) (CQE_QPID_G(be32_to_cpu((x)->header)))
217#define CQE_TYPE(x) (CQE_TYPE_G(be32_to_cpu((x)->header))) 223#define CQE_TYPE(x) (CQE_TYPE_G(be32_to_cpu((x)->header)))
218#define SQ_TYPE(x) (CQE_TYPE((x))) 224#define SQ_TYPE(x) (CQE_TYPE((x)))
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 4a9b4d7efe63..8ce9118d4a7f 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1131,7 +1131,6 @@ struct hfi1_devdata {
1131 u16 pcie_lnkctl; 1131 u16 pcie_lnkctl;
1132 u16 pcie_devctl2; 1132 u16 pcie_devctl2;
1133 u32 pci_msix0; 1133 u32 pci_msix0;
1134 u32 pci_lnkctl3;
1135 u32 pci_tph2; 1134 u32 pci_tph2;
1136 1135
1137 /* 1136 /*
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 09e50fd2a08f..8c7e7a60b715 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -411,15 +411,12 @@ int restore_pci_variables(struct hfi1_devdata *dd)
411 if (ret) 411 if (ret)
412 goto error; 412 goto error;
413 413
414 ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, 414 if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) {
415 dd->pci_lnkctl3); 415 ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2,
416 if (ret) 416 dd->pci_tph2);
417 goto error; 417 if (ret)
418 418 goto error;
419 ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2); 419 }
420 if (ret)
421 goto error;
422
423 return 0; 420 return 0;
424 421
425error: 422error:
@@ -469,15 +466,12 @@ int save_pci_variables(struct hfi1_devdata *dd)
469 if (ret) 466 if (ret)
470 goto error; 467 goto error;
471 468
472 ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, 469 if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) {
473 &dd->pci_lnkctl3); 470 ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2,
474 if (ret) 471 &dd->pci_tph2);
475 goto error; 472 if (ret)
476 473 goto error;
477 ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2); 474 }
478 if (ret)
479 goto error;
480
481 return 0; 475 return 0;
482 476
483error: 477error:
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 013049bcdb53..caf490ab24c8 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -666,6 +666,19 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
666 return (-EOPNOTSUPP); 666 return (-EOPNOTSUPP);
667 } 667 }
668 668
669 if (ucmd->rx_hash_fields_mask & ~(MLX4_IB_RX_HASH_SRC_IPV4 |
670 MLX4_IB_RX_HASH_DST_IPV4 |
671 MLX4_IB_RX_HASH_SRC_IPV6 |
672 MLX4_IB_RX_HASH_DST_IPV6 |
673 MLX4_IB_RX_HASH_SRC_PORT_TCP |
674 MLX4_IB_RX_HASH_DST_PORT_TCP |
675 MLX4_IB_RX_HASH_SRC_PORT_UDP |
676 MLX4_IB_RX_HASH_DST_PORT_UDP)) {
677 pr_debug("RX Hash fields_mask has unsupported mask (0x%llx)\n",
678 ucmd->rx_hash_fields_mask);
679 return (-EOPNOTSUPP);
680 }
681
669 if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV4) && 682 if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV4) &&
670 (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV4)) { 683 (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV4)) {
671 rss_ctx->flags = MLX4_RSS_IPV4; 684 rss_ctx->flags = MLX4_RSS_IPV4;
@@ -691,11 +704,11 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
691 return (-EOPNOTSUPP); 704 return (-EOPNOTSUPP);
692 } 705 }
693 706
694 if (rss_ctx->flags & MLX4_RSS_IPV4) { 707 if (rss_ctx->flags & MLX4_RSS_IPV4)
695 rss_ctx->flags |= MLX4_RSS_UDP_IPV4; 708 rss_ctx->flags |= MLX4_RSS_UDP_IPV4;
696 } else if (rss_ctx->flags & MLX4_RSS_IPV6) { 709 if (rss_ctx->flags & MLX4_RSS_IPV6)
697 rss_ctx->flags |= MLX4_RSS_UDP_IPV6; 710 rss_ctx->flags |= MLX4_RSS_UDP_IPV6;
698 } else { 711 if (!(rss_ctx->flags & (MLX4_RSS_IPV6 | MLX4_RSS_IPV4))) {
699 pr_debug("RX Hash fields_mask is not supported - UDP must be set with IPv4 or IPv6\n"); 712 pr_debug("RX Hash fields_mask is not supported - UDP must be set with IPv4 or IPv6\n");
700 return (-EOPNOTSUPP); 713 return (-EOPNOTSUPP);
701 } 714 }
@@ -707,15 +720,14 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
707 720
708 if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) && 721 if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) &&
709 (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) { 722 (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) {
710 if (rss_ctx->flags & MLX4_RSS_IPV4) { 723 if (rss_ctx->flags & MLX4_RSS_IPV4)
711 rss_ctx->flags |= MLX4_RSS_TCP_IPV4; 724 rss_ctx->flags |= MLX4_RSS_TCP_IPV4;
712 } else if (rss_ctx->flags & MLX4_RSS_IPV6) { 725 if (rss_ctx->flags & MLX4_RSS_IPV6)
713 rss_ctx->flags |= MLX4_RSS_TCP_IPV6; 726 rss_ctx->flags |= MLX4_RSS_TCP_IPV6;
714 } else { 727 if (!(rss_ctx->flags & (MLX4_RSS_IPV6 | MLX4_RSS_IPV4))) {
715 pr_debug("RX Hash fields_mask is not supported - TCP must be set with IPv4 or IPv6\n"); 728 pr_debug("RX Hash fields_mask is not supported - TCP must be set with IPv4 or IPv6\n");
716 return (-EOPNOTSUPP); 729 return (-EOPNOTSUPP);
717 } 730 }
718
719 } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) || 731 } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) ||
720 (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) { 732 (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) {
721 pr_debug("RX Hash fields_mask is not supported - both TCP SRC and DST must be set\n"); 733 pr_debug("RX Hash fields_mask is not supported - both TCP SRC and DST must be set\n");
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 470995fa38d2..6f6712f87a73 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -47,17 +47,6 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
47 return err; 47 return err;
48} 48}
49 49
50int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
51 bool reset, void *out, int out_size)
52{
53 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
54
55 MLX5_SET(query_cong_statistics_in, in, opcode,
56 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
57 MLX5_SET(query_cong_statistics_in, in, clear, reset);
58 return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
59}
60
61int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, 50int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
62 void *out, int out_size) 51 void *out, int out_size)
63{ 52{
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index af4c24596274..78ffded7cc2c 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -37,8 +37,6 @@
37#include <linux/mlx5/driver.h> 37#include <linux/mlx5/driver.h>
38 38
39int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); 39int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
40int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
41 bool reset, void *out, int out_size);
42int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, 40int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
43 void *out, int out_size); 41 void *out, int out_size);
44int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, 42int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 543d0a4c8bf3..8ac50de2b242 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1463,6 +1463,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
1463 } 1463 }
1464 1464
1465 INIT_LIST_HEAD(&context->vma_private_list); 1465 INIT_LIST_HEAD(&context->vma_private_list);
1466 mutex_init(&context->vma_private_list_mutex);
1466 INIT_LIST_HEAD(&context->db_page_list); 1467 INIT_LIST_HEAD(&context->db_page_list);
1467 mutex_init(&context->db_page_mutex); 1468 mutex_init(&context->db_page_mutex);
1468 1469
@@ -1624,7 +1625,9 @@ static void mlx5_ib_vma_close(struct vm_area_struct *area)
1624 * mlx5_ib_disassociate_ucontext(). 1625 * mlx5_ib_disassociate_ucontext().
1625 */ 1626 */
1626 mlx5_ib_vma_priv_data->vma = NULL; 1627 mlx5_ib_vma_priv_data->vma = NULL;
1628 mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
1627 list_del(&mlx5_ib_vma_priv_data->list); 1629 list_del(&mlx5_ib_vma_priv_data->list);
1630 mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
1628 kfree(mlx5_ib_vma_priv_data); 1631 kfree(mlx5_ib_vma_priv_data);
1629} 1632}
1630 1633
@@ -1644,10 +1647,13 @@ static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
1644 return -ENOMEM; 1647 return -ENOMEM;
1645 1648
1646 vma_prv->vma = vma; 1649 vma_prv->vma = vma;
1650 vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex;
1647 vma->vm_private_data = vma_prv; 1651 vma->vm_private_data = vma_prv;
1648 vma->vm_ops = &mlx5_ib_vm_ops; 1652 vma->vm_ops = &mlx5_ib_vm_ops;
1649 1653
1654 mutex_lock(&ctx->vma_private_list_mutex);
1650 list_add(&vma_prv->list, vma_head); 1655 list_add(&vma_prv->list, vma_head);
1656 mutex_unlock(&ctx->vma_private_list_mutex);
1651 1657
1652 return 0; 1658 return 0;
1653} 1659}
@@ -1690,6 +1696,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
1690 * mlx5_ib_vma_close. 1696 * mlx5_ib_vma_close.
1691 */ 1697 */
1692 down_write(&owning_mm->mmap_sem); 1698 down_write(&owning_mm->mmap_sem);
1699 mutex_lock(&context->vma_private_list_mutex);
1693 list_for_each_entry_safe(vma_private, n, &context->vma_private_list, 1700 list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
1694 list) { 1701 list) {
1695 vma = vma_private->vma; 1702 vma = vma_private->vma;
@@ -1704,6 +1711,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
1704 list_del(&vma_private->list); 1711 list_del(&vma_private->list);
1705 kfree(vma_private); 1712 kfree(vma_private);
1706 } 1713 }
1714 mutex_unlock(&context->vma_private_list_mutex);
1707 up_write(&owning_mm->mmap_sem); 1715 up_write(&owning_mm->mmap_sem);
1708 mmput(owning_mm); 1716 mmput(owning_mm);
1709 put_task_struct(owning_process); 1717 put_task_struct(owning_process);
@@ -3737,34 +3745,6 @@ free:
3737 return ret; 3745 return ret;
3738} 3746}
3739 3747
3740static int mlx5_ib_query_cong_counters(struct mlx5_ib_dev *dev,
3741 struct mlx5_ib_port *port,
3742 struct rdma_hw_stats *stats)
3743{
3744 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
3745 void *out;
3746 int ret, i;
3747 int offset = port->cnts.num_q_counters;
3748
3749 out = kvzalloc(outlen, GFP_KERNEL);
3750 if (!out)
3751 return -ENOMEM;
3752
3753 ret = mlx5_cmd_query_cong_counter(dev->mdev, false, out, outlen);
3754 if (ret)
3755 goto free;
3756
3757 for (i = 0; i < port->cnts.num_cong_counters; i++) {
3758 stats->value[i + offset] =
3759 be64_to_cpup((__be64 *)(out +
3760 port->cnts.offsets[i + offset]));
3761 }
3762
3763free:
3764 kvfree(out);
3765 return ret;
3766}
3767
3768static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, 3748static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
3769 struct rdma_hw_stats *stats, 3749 struct rdma_hw_stats *stats,
3770 u8 port_num, int index) 3750 u8 port_num, int index)
@@ -3782,7 +3762,12 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
3782 num_counters = port->cnts.num_q_counters; 3762 num_counters = port->cnts.num_q_counters;
3783 3763
3784 if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { 3764 if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
3785 ret = mlx5_ib_query_cong_counters(dev, port, stats); 3765 ret = mlx5_lag_query_cong_counters(dev->mdev,
3766 stats->value +
3767 port->cnts.num_q_counters,
3768 port->cnts.num_cong_counters,
3769 port->cnts.offsets +
3770 port->cnts.num_q_counters);
3786 if (ret) 3771 if (ret)
3787 return ret; 3772 return ret;
3788 num_counters += port->cnts.num_cong_counters; 3773 num_counters += port->cnts.num_cong_counters;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 6dd8cac78de2..2c5f3533bbc9 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -115,6 +115,8 @@ enum {
115struct mlx5_ib_vma_private_data { 115struct mlx5_ib_vma_private_data {
116 struct list_head list; 116 struct list_head list;
117 struct vm_area_struct *vma; 117 struct vm_area_struct *vma;
118 /* protect vma_private_list add/del */
119 struct mutex *vma_private_list_mutex;
118}; 120};
119 121
120struct mlx5_ib_ucontext { 122struct mlx5_ib_ucontext {
@@ -129,6 +131,8 @@ struct mlx5_ib_ucontext {
129 /* Transport Domain number */ 131 /* Transport Domain number */
130 u32 tdn; 132 u32 tdn;
131 struct list_head vma_private_list; 133 struct list_head vma_private_list;
134 /* protect vma_private_list add/del */
135 struct mutex vma_private_list_mutex;
132 136
133 unsigned long upd_xlt_page; 137 unsigned long upd_xlt_page;
134 /* protect ODP/KSM */ 138 /* protect ODP/KSM */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index ee0ee1f9994b..d109fe8290a7 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1637,6 +1637,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
1637 MLX5_SET(mkc, mkc, access_mode, mr->access_mode); 1637 MLX5_SET(mkc, mkc, access_mode, mr->access_mode);
1638 MLX5_SET(mkc, mkc, umr_en, 1); 1638 MLX5_SET(mkc, mkc, umr_en, 1);
1639 1639
1640 mr->ibmr.device = pd->device;
1640 err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); 1641 err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
1641 if (err) 1642 if (err)
1642 goto err_destroy_psv; 1643 goto err_destroy_psv;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
index 63bc2efc34eb..4f7bd3b6a315 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -94,7 +94,7 @@ struct pvrdma_cq {
94 u32 cq_handle; 94 u32 cq_handle;
95 bool is_kernel; 95 bool is_kernel;
96 atomic_t refcnt; 96 atomic_t refcnt;
97 wait_queue_head_t wait; 97 struct completion free;
98}; 98};
99 99
100struct pvrdma_id_table { 100struct pvrdma_id_table {
@@ -175,7 +175,7 @@ struct pvrdma_srq {
175 u32 srq_handle; 175 u32 srq_handle;
176 int npages; 176 int npages;
177 refcount_t refcnt; 177 refcount_t refcnt;
178 wait_queue_head_t wait; 178 struct completion free;
179}; 179};
180 180
181struct pvrdma_qp { 181struct pvrdma_qp {
@@ -197,7 +197,7 @@ struct pvrdma_qp {
197 bool is_kernel; 197 bool is_kernel;
198 struct mutex mutex; /* QP state mutex. */ 198 struct mutex mutex; /* QP state mutex. */
199 atomic_t refcnt; 199 atomic_t refcnt;
200 wait_queue_head_t wait; 200 struct completion free;
201}; 201};
202 202
203struct pvrdma_dev { 203struct pvrdma_dev {
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
index 3562c0c30492..e529622cefad 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -179,7 +179,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
179 pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0); 179 pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0);
180 180
181 atomic_set(&cq->refcnt, 1); 181 atomic_set(&cq->refcnt, 1);
182 init_waitqueue_head(&cq->wait); 182 init_completion(&cq->free);
183 spin_lock_init(&cq->cq_lock); 183 spin_lock_init(&cq->cq_lock);
184 184
185 memset(cmd, 0, sizeof(*cmd)); 185 memset(cmd, 0, sizeof(*cmd));
@@ -230,8 +230,9 @@ err_cq:
230 230
231static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq) 231static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq)
232{ 232{
233 atomic_dec(&cq->refcnt); 233 if (atomic_dec_and_test(&cq->refcnt))
234 wait_event(cq->wait, !atomic_read(&cq->refcnt)); 234 complete(&cq->free);
235 wait_for_completion(&cq->free);
235 236
236 if (!cq->is_kernel) 237 if (!cq->is_kernel)
237 ib_umem_release(cq->umem); 238 ib_umem_release(cq->umem);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 1f4e18717a00..e92681878c93 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -346,9 +346,8 @@ static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type)
346 ibqp->event_handler(&e, ibqp->qp_context); 346 ibqp->event_handler(&e, ibqp->qp_context);
347 } 347 }
348 if (qp) { 348 if (qp) {
349 atomic_dec(&qp->refcnt); 349 if (atomic_dec_and_test(&qp->refcnt))
350 if (atomic_read(&qp->refcnt) == 0) 350 complete(&qp->free);
351 wake_up(&qp->wait);
352 } 351 }
353} 352}
354 353
@@ -373,9 +372,8 @@ static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type)
373 ibcq->event_handler(&e, ibcq->cq_context); 372 ibcq->event_handler(&e, ibcq->cq_context);
374 } 373 }
375 if (cq) { 374 if (cq) {
376 atomic_dec(&cq->refcnt); 375 if (atomic_dec_and_test(&cq->refcnt))
377 if (atomic_read(&cq->refcnt) == 0) 376 complete(&cq->free);
378 wake_up(&cq->wait);
379 } 377 }
380} 378}
381 379
@@ -404,7 +402,7 @@ static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type)
404 } 402 }
405 if (srq) { 403 if (srq) {
406 if (refcount_dec_and_test(&srq->refcnt)) 404 if (refcount_dec_and_test(&srq->refcnt))
407 wake_up(&srq->wait); 405 complete(&srq->free);
408 } 406 }
409} 407}
410 408
@@ -539,9 +537,8 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id)
539 if (cq && cq->ibcq.comp_handler) 537 if (cq && cq->ibcq.comp_handler)
540 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 538 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
541 if (cq) { 539 if (cq) {
542 atomic_dec(&cq->refcnt); 540 if (atomic_dec_and_test(&cq->refcnt))
543 if (atomic_read(&cq->refcnt)) 541 complete(&cq->free);
544 wake_up(&cq->wait);
545 } 542 }
546 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); 543 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots);
547 } 544 }
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 10420a18d02f..4059308e1454 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -246,7 +246,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
246 spin_lock_init(&qp->rq.lock); 246 spin_lock_init(&qp->rq.lock);
247 mutex_init(&qp->mutex); 247 mutex_init(&qp->mutex);
248 atomic_set(&qp->refcnt, 1); 248 atomic_set(&qp->refcnt, 1);
249 init_waitqueue_head(&qp->wait); 249 init_completion(&qp->free);
250 250
251 qp->state = IB_QPS_RESET; 251 qp->state = IB_QPS_RESET;
252 252
@@ -428,8 +428,16 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp)
428 428
429 pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); 429 pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
430 430
431 atomic_dec(&qp->refcnt); 431 if (atomic_dec_and_test(&qp->refcnt))
432 wait_event(qp->wait, !atomic_read(&qp->refcnt)); 432 complete(&qp->free);
433 wait_for_completion(&qp->free);
434
435 if (!qp->is_kernel) {
436 if (qp->rumem)
437 ib_umem_release(qp->rumem);
438 if (qp->sumem)
439 ib_umem_release(qp->sumem);
440 }
433 441
434 pvrdma_page_dir_cleanup(dev, &qp->pdir); 442 pvrdma_page_dir_cleanup(dev, &qp->pdir);
435 443
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
index 826ccb864596..5acebb1ef631 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
@@ -149,7 +149,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd,
149 149
150 spin_lock_init(&srq->lock); 150 spin_lock_init(&srq->lock);
151 refcount_set(&srq->refcnt, 1); 151 refcount_set(&srq->refcnt, 1);
152 init_waitqueue_head(&srq->wait); 152 init_completion(&srq->free);
153 153
154 dev_dbg(&dev->pdev->dev, 154 dev_dbg(&dev->pdev->dev,
155 "create shared receive queue from user space\n"); 155 "create shared receive queue from user space\n");
@@ -236,8 +236,9 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq)
236 dev->srq_tbl[srq->srq_handle] = NULL; 236 dev->srq_tbl[srq->srq_handle] = NULL;
237 spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); 237 spin_unlock_irqrestore(&dev->srq_tbl_lock, flags);
238 238
239 refcount_dec(&srq->refcnt); 239 if (refcount_dec_and_test(&srq->refcnt))
240 wait_event(srq->wait, !refcount_read(&srq->refcnt)); 240 complete(&srq->free);
241 wait_for_completion(&srq->free);
241 242
242 /* There is no support for kernel clients, so this is safe. */ 243 /* There is no support for kernel clients, so this is safe. */
243 ib_umem_release(srq->umem); 244 ib_umem_release(srq->umem);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 87f4bd99cdf7..2c13123bfd69 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1145,6 +1145,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
1145 noio_flag = memalloc_noio_save(); 1145 noio_flag = memalloc_noio_save();
1146 p->tx_ring = vzalloc(ipoib_sendq_size * sizeof(*p->tx_ring)); 1146 p->tx_ring = vzalloc(ipoib_sendq_size * sizeof(*p->tx_ring));
1147 if (!p->tx_ring) { 1147 if (!p->tx_ring) {
1148 memalloc_noio_restore(noio_flag);
1148 ret = -ENOMEM; 1149 ret = -ENOMEM;
1149 goto err_tx; 1150 goto err_tx;
1150 } 1151 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 3b96cdaf9a83..e6151a29c412 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -1236,13 +1236,10 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
1236 ipoib_ib_dev_down(dev); 1236 ipoib_ib_dev_down(dev);
1237 1237
1238 if (level == IPOIB_FLUSH_HEAVY) { 1238 if (level == IPOIB_FLUSH_HEAVY) {
1239 rtnl_lock();
1240 if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) 1239 if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
1241 ipoib_ib_dev_stop(dev); 1240 ipoib_ib_dev_stop(dev);
1242 1241
1243 result = ipoib_ib_dev_open(dev); 1242 if (ipoib_ib_dev_open(dev))
1244 rtnl_unlock();
1245 if (result)
1246 return; 1243 return;
1247 1244
1248 if (netif_queue_stopped(dev)) 1245 if (netif_queue_stopped(dev))
@@ -1282,7 +1279,9 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work)
1282 struct ipoib_dev_priv *priv = 1279 struct ipoib_dev_priv *priv =
1283 container_of(work, struct ipoib_dev_priv, flush_heavy); 1280 container_of(work, struct ipoib_dev_priv, flush_heavy);
1284 1281
1282 rtnl_lock();
1285 __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0); 1283 __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0);
1284 rtnl_unlock();
1286} 1285}
1287 1286
1288void ipoib_ib_dev_cleanup(struct net_device *dev) 1287void ipoib_ib_dev_cleanup(struct net_device *dev)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 7d5eb004091d..97baf88d9505 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4184,7 +4184,7 @@ static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu,
4184 struct irq_cfg *cfg); 4184 struct irq_cfg *cfg);
4185 4185
4186static int irq_remapping_activate(struct irq_domain *domain, 4186static int irq_remapping_activate(struct irq_domain *domain,
4187 struct irq_data *irq_data, bool early) 4187 struct irq_data *irq_data, bool reserve)
4188{ 4188{
4189 struct amd_ir_data *data = irq_data->chip_data; 4189 struct amd_ir_data *data = irq_data->chip_data;
4190 struct irq_2_irte *irte_info = &data->irq_2_irte; 4190 struct irq_2_irte *irte_info = &data->irq_2_irte;
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 76a193c7fcfc..66f69af2c219 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -1397,7 +1397,7 @@ static void intel_irq_remapping_free(struct irq_domain *domain,
1397} 1397}
1398 1398
1399static int intel_irq_remapping_activate(struct irq_domain *domain, 1399static int intel_irq_remapping_activate(struct irq_domain *domain,
1400 struct irq_data *irq_data, bool early) 1400 struct irq_data *irq_data, bool reserve)
1401{ 1401{
1402 intel_ir_reconfigure_irte(irq_data, true); 1402 intel_ir_reconfigure_irte(irq_data, true);
1403 return 0; 1403 return 0;
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 4039e64cd342..06f025fd5726 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -2303,7 +2303,7 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
2303} 2303}
2304 2304
2305static int its_irq_domain_activate(struct irq_domain *domain, 2305static int its_irq_domain_activate(struct irq_domain *domain,
2306 struct irq_data *d, bool early) 2306 struct irq_data *d, bool reserve)
2307{ 2307{
2308 struct its_device *its_dev = irq_data_get_irq_chip_data(d); 2308 struct its_device *its_dev = irq_data_get_irq_chip_data(d);
2309 u32 event = its_get_event_id(d); 2309 u32 event = its_get_event_id(d);
@@ -2818,7 +2818,7 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq
2818} 2818}
2819 2819
2820static int its_vpe_irq_domain_activate(struct irq_domain *domain, 2820static int its_vpe_irq_domain_activate(struct irq_domain *domain,
2821 struct irq_data *d, bool early) 2821 struct irq_data *d, bool reserve)
2822{ 2822{
2823 struct its_vpe *vpe = irq_data_get_irq_chip_data(d); 2823 struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
2824 struct its_node *its; 2824 struct its_node *its;
diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c
index 06f29cf5018a..cee59fe1321c 100644
--- a/drivers/irqchip/irq-renesas-intc-irqpin.c
+++ b/drivers/irqchip/irq-renesas-intc-irqpin.c
@@ -342,6 +342,9 @@ static irqreturn_t intc_irqpin_shared_irq_handler(int irq, void *dev_id)
342 */ 342 */
343static struct lock_class_key intc_irqpin_irq_lock_class; 343static struct lock_class_key intc_irqpin_irq_lock_class;
344 344
345/* And this is for the request mutex */
346static struct lock_class_key intc_irqpin_irq_request_class;
347
345static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq, 348static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq,
346 irq_hw_number_t hw) 349 irq_hw_number_t hw)
347{ 350{
@@ -352,7 +355,8 @@ static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq,
352 355
353 intc_irqpin_dbg(&p->irq[hw], "map"); 356 intc_irqpin_dbg(&p->irq[hw], "map");
354 irq_set_chip_data(virq, h->host_data); 357 irq_set_chip_data(virq, h->host_data);
355 irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class); 358 irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class,
359 &intc_irqpin_irq_request_class);
356 irq_set_chip_and_handler(virq, &p->irq_chip, handle_level_irq); 360 irq_set_chip_and_handler(virq, &p->irq_chip, handle_level_irq);
357 return 0; 361 return 0;
358} 362}
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index fd83c7f77a95..f3654fd2eaf3 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -186,7 +186,7 @@ void led_blink_set(struct led_classdev *led_cdev,
186 unsigned long *delay_on, 186 unsigned long *delay_on,
187 unsigned long *delay_off) 187 unsigned long *delay_off)
188{ 188{
189 del_timer_sync(&led_cdev->blink_timer); 189 led_stop_software_blink(led_cdev);
190 190
191 clear_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags); 191 clear_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags);
192 clear_bit(LED_BLINK_ONESHOT_STOP, &led_cdev->work_flags); 192 clear_bit(LED_BLINK_ONESHOT_STOP, &led_cdev->work_flags);
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index b8ac591aaaa7..c546b567f3b5 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1611,7 +1611,8 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
1611 int l; 1611 int l;
1612 struct dm_buffer *b, *tmp; 1612 struct dm_buffer *b, *tmp;
1613 unsigned long freed = 0; 1613 unsigned long freed = 0;
1614 unsigned long count = nr_to_scan; 1614 unsigned long count = c->n_buffers[LIST_CLEAN] +
1615 c->n_buffers[LIST_DIRTY];
1615 unsigned long retain_target = get_retain_buffers(c); 1616 unsigned long retain_target = get_retain_buffers(c);
1616 1617
1617 for (l = 0; l < LIST_SIZE; l++) { 1618 for (l = 0; l < LIST_SIZE; l++) {
@@ -1647,8 +1648,11 @@ static unsigned long
1647dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) 1648dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
1648{ 1649{
1649 struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker); 1650 struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker);
1651 unsigned long count = READ_ONCE(c->n_buffers[LIST_CLEAN]) +
1652 READ_ONCE(c->n_buffers[LIST_DIRTY]);
1653 unsigned long retain_target = get_retain_buffers(c);
1650 1654
1651 return READ_ONCE(c->n_buffers[LIST_CLEAN]) + READ_ONCE(c->n_buffers[LIST_DIRTY]); 1655 return (count < retain_target) ? 0 : (count - retain_target);
1652} 1656}
1653 1657
1654/* 1658/*
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index cf23a14f9c6a..47407e43b96a 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -3472,18 +3472,18 @@ static int __init dm_cache_init(void)
3472{ 3472{
3473 int r; 3473 int r;
3474 3474
3475 r = dm_register_target(&cache_target);
3476 if (r) {
3477 DMERR("cache target registration failed: %d", r);
3478 return r;
3479 }
3480
3481 migration_cache = KMEM_CACHE(dm_cache_migration, 0); 3475 migration_cache = KMEM_CACHE(dm_cache_migration, 0);
3482 if (!migration_cache) { 3476 if (!migration_cache) {
3483 dm_unregister_target(&cache_target); 3477 dm_unregister_target(&cache_target);
3484 return -ENOMEM; 3478 return -ENOMEM;
3485 } 3479 }
3486 3480
3481 r = dm_register_target(&cache_target);
3482 if (r) {
3483 DMERR("cache target registration failed: %d", r);
3484 return r;
3485 }
3486
3487 return 0; 3487 return 0;
3488} 3488}
3489 3489
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index c8faa2b85842..f7810cc869ac 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -458,6 +458,38 @@ do { \
458} while (0) 458} while (0)
459 459
460/* 460/*
461 * Check whether bios must be queued in the device-mapper core rather
462 * than here in the target.
463 *
464 * If MPATHF_QUEUE_IF_NO_PATH and MPATHF_SAVED_QUEUE_IF_NO_PATH hold
465 * the same value then we are not between multipath_presuspend()
466 * and multipath_resume() calls and we have no need to check
467 * for the DMF_NOFLUSH_SUSPENDING flag.
468 */
469static bool __must_push_back(struct multipath *m, unsigned long flags)
470{
471 return ((test_bit(MPATHF_QUEUE_IF_NO_PATH, &flags) !=
472 test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &flags)) &&
473 dm_noflush_suspending(m->ti));
474}
475
476/*
477 * Following functions use READ_ONCE to get atomic access to
478 * all m->flags to avoid taking spinlock
479 */
480static bool must_push_back_rq(struct multipath *m)
481{
482 unsigned long flags = READ_ONCE(m->flags);
483 return test_bit(MPATHF_QUEUE_IF_NO_PATH, &flags) || __must_push_back(m, flags);
484}
485
486static bool must_push_back_bio(struct multipath *m)
487{
488 unsigned long flags = READ_ONCE(m->flags);
489 return __must_push_back(m, flags);
490}
491
492/*
461 * Map cloned requests (request-based multipath) 493 * Map cloned requests (request-based multipath)
462 */ 494 */
463static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, 495static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
@@ -478,7 +510,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
478 pgpath = choose_pgpath(m, nr_bytes); 510 pgpath = choose_pgpath(m, nr_bytes);
479 511
480 if (!pgpath) { 512 if (!pgpath) {
481 if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) 513 if (must_push_back_rq(m))
482 return DM_MAPIO_DELAY_REQUEUE; 514 return DM_MAPIO_DELAY_REQUEUE;
483 dm_report_EIO(m); /* Failed */ 515 dm_report_EIO(m); /* Failed */
484 return DM_MAPIO_KILL; 516 return DM_MAPIO_KILL;
@@ -553,7 +585,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
553 } 585 }
554 586
555 if (!pgpath) { 587 if (!pgpath) {
556 if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) 588 if (must_push_back_bio(m))
557 return DM_MAPIO_REQUEUE; 589 return DM_MAPIO_REQUEUE;
558 dm_report_EIO(m); 590 dm_report_EIO(m);
559 return DM_MAPIO_KILL; 591 return DM_MAPIO_KILL;
@@ -651,8 +683,7 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
651 assign_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags, 683 assign_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags,
652 (save_old_value && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) || 684 (save_old_value && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) ||
653 (!save_old_value && queue_if_no_path)); 685 (!save_old_value && queue_if_no_path));
654 assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags, 686 assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path);
655 queue_if_no_path || dm_noflush_suspending(m->ti));
656 spin_unlock_irqrestore(&m->lock, flags); 687 spin_unlock_irqrestore(&m->lock, flags);
657 688
658 if (!queue_if_no_path) { 689 if (!queue_if_no_path) {
@@ -1486,7 +1517,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
1486 fail_path(pgpath); 1517 fail_path(pgpath);
1487 1518
1488 if (atomic_read(&m->nr_valid_paths) == 0 && 1519 if (atomic_read(&m->nr_valid_paths) == 0 &&
1489 !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { 1520 !must_push_back_rq(m)) {
1490 if (error == BLK_STS_IOERR) 1521 if (error == BLK_STS_IOERR)
1491 dm_report_EIO(m); 1522 dm_report_EIO(m);
1492 /* complete with the original error */ 1523 /* complete with the original error */
@@ -1521,8 +1552,12 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
1521 1552
1522 if (atomic_read(&m->nr_valid_paths) == 0 && 1553 if (atomic_read(&m->nr_valid_paths) == 0 &&
1523 !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { 1554 !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
1524 dm_report_EIO(m); 1555 if (must_push_back_bio(m)) {
1525 *error = BLK_STS_IOERR; 1556 r = DM_ENDIO_REQUEUE;
1557 } else {
1558 dm_report_EIO(m);
1559 *error = BLK_STS_IOERR;
1560 }
1526 goto done; 1561 goto done;
1527 } 1562 }
1528 1563
@@ -1957,13 +1992,6 @@ static int __init dm_multipath_init(void)
1957{ 1992{
1958 int r; 1993 int r;
1959 1994
1960 r = dm_register_target(&multipath_target);
1961 if (r < 0) {
1962 DMERR("request-based register failed %d", r);
1963 r = -EINVAL;
1964 goto bad_register_target;
1965 }
1966
1967 kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0); 1995 kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0);
1968 if (!kmultipathd) { 1996 if (!kmultipathd) {
1969 DMERR("failed to create workqueue kmpathd"); 1997 DMERR("failed to create workqueue kmpathd");
@@ -1985,13 +2013,20 @@ static int __init dm_multipath_init(void)
1985 goto bad_alloc_kmpath_handlerd; 2013 goto bad_alloc_kmpath_handlerd;
1986 } 2014 }
1987 2015
2016 r = dm_register_target(&multipath_target);
2017 if (r < 0) {
2018 DMERR("request-based register failed %d", r);
2019 r = -EINVAL;
2020 goto bad_register_target;
2021 }
2022
1988 return 0; 2023 return 0;
1989 2024
2025bad_register_target:
2026 destroy_workqueue(kmpath_handlerd);
1990bad_alloc_kmpath_handlerd: 2027bad_alloc_kmpath_handlerd:
1991 destroy_workqueue(kmultipathd); 2028 destroy_workqueue(kmultipathd);
1992bad_alloc_kmultipathd: 2029bad_alloc_kmultipathd:
1993 dm_unregister_target(&multipath_target);
1994bad_register_target:
1995 return r; 2030 return r;
1996} 2031}
1997 2032
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 1113b42e1eda..a0613bd8ed00 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -2411,24 +2411,6 @@ static int __init dm_snapshot_init(void)
2411 return r; 2411 return r;
2412 } 2412 }
2413 2413
2414 r = dm_register_target(&snapshot_target);
2415 if (r < 0) {
2416 DMERR("snapshot target register failed %d", r);
2417 goto bad_register_snapshot_target;
2418 }
2419
2420 r = dm_register_target(&origin_target);
2421 if (r < 0) {
2422 DMERR("Origin target register failed %d", r);
2423 goto bad_register_origin_target;
2424 }
2425
2426 r = dm_register_target(&merge_target);
2427 if (r < 0) {
2428 DMERR("Merge target register failed %d", r);
2429 goto bad_register_merge_target;
2430 }
2431
2432 r = init_origin_hash(); 2414 r = init_origin_hash();
2433 if (r) { 2415 if (r) {
2434 DMERR("init_origin_hash failed."); 2416 DMERR("init_origin_hash failed.");
@@ -2449,19 +2431,37 @@ static int __init dm_snapshot_init(void)
2449 goto bad_pending_cache; 2431 goto bad_pending_cache;
2450 } 2432 }
2451 2433
2434 r = dm_register_target(&snapshot_target);
2435 if (r < 0) {
2436 DMERR("snapshot target register failed %d", r);
2437 goto bad_register_snapshot_target;
2438 }
2439
2440 r = dm_register_target(&origin_target);
2441 if (r < 0) {
2442 DMERR("Origin target register failed %d", r);
2443 goto bad_register_origin_target;
2444 }
2445
2446 r = dm_register_target(&merge_target);
2447 if (r < 0) {
2448 DMERR("Merge target register failed %d", r);
2449 goto bad_register_merge_target;
2450 }
2451
2452 return 0; 2452 return 0;
2453 2453
2454bad_pending_cache:
2455 kmem_cache_destroy(exception_cache);
2456bad_exception_cache:
2457 exit_origin_hash();
2458bad_origin_hash:
2459 dm_unregister_target(&merge_target);
2460bad_register_merge_target: 2454bad_register_merge_target:
2461 dm_unregister_target(&origin_target); 2455 dm_unregister_target(&origin_target);
2462bad_register_origin_target: 2456bad_register_origin_target:
2463 dm_unregister_target(&snapshot_target); 2457 dm_unregister_target(&snapshot_target);
2464bad_register_snapshot_target: 2458bad_register_snapshot_target:
2459 kmem_cache_destroy(pending_cache);
2460bad_pending_cache:
2461 kmem_cache_destroy(exception_cache);
2462bad_exception_cache:
2463 exit_origin_hash();
2464bad_origin_hash:
2465 dm_exception_store_exit(); 2465 dm_exception_store_exit();
2466 2466
2467 return r; 2467 return r;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 88130b5d95f9..aaffd0c0ee9a 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -453,14 +453,15 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
453 453
454 refcount_set(&dd->count, 1); 454 refcount_set(&dd->count, 1);
455 list_add(&dd->list, &t->devices); 455 list_add(&dd->list, &t->devices);
456 goto out;
456 457
457 } else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) { 458 } else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) {
458 r = upgrade_mode(dd, mode, t->md); 459 r = upgrade_mode(dd, mode, t->md);
459 if (r) 460 if (r)
460 return r; 461 return r;
461 refcount_inc(&dd->count);
462 } 462 }
463 463 refcount_inc(&dd->count);
464out:
464 *result = dd->dm_dev; 465 *result = dd->dm_dev;
465 return 0; 466 return 0;
466} 467}
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 89e5dff9b4cf..f91d771fff4b 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -4355,30 +4355,28 @@ static struct target_type thin_target = {
4355 4355
4356static int __init dm_thin_init(void) 4356static int __init dm_thin_init(void)
4357{ 4357{
4358 int r; 4358 int r = -ENOMEM;
4359 4359
4360 pool_table_init(); 4360 pool_table_init();
4361 4361
4362 _new_mapping_cache = KMEM_CACHE(dm_thin_new_mapping, 0);
4363 if (!_new_mapping_cache)
4364 return r;
4365
4362 r = dm_register_target(&thin_target); 4366 r = dm_register_target(&thin_target);
4363 if (r) 4367 if (r)
4364 return r; 4368 goto bad_new_mapping_cache;
4365 4369
4366 r = dm_register_target(&pool_target); 4370 r = dm_register_target(&pool_target);
4367 if (r) 4371 if (r)
4368 goto bad_pool_target; 4372 goto bad_thin_target;
4369
4370 r = -ENOMEM;
4371
4372 _new_mapping_cache = KMEM_CACHE(dm_thin_new_mapping, 0);
4373 if (!_new_mapping_cache)
4374 goto bad_new_mapping_cache;
4375 4373
4376 return 0; 4374 return 0;
4377 4375
4378bad_new_mapping_cache: 4376bad_thin_target:
4379 dm_unregister_target(&pool_target);
4380bad_pool_target:
4381 dm_unregister_target(&thin_target); 4377 dm_unregister_target(&thin_target);
4378bad_new_mapping_cache:
4379 kmem_cache_destroy(_new_mapping_cache);
4382 4380
4383 return r; 4381 return r;
4384} 4382}
diff --git a/drivers/mfd/arizona-irq.c b/drivers/mfd/arizona-irq.c
index 09cf3699e354..a307832d7e45 100644
--- a/drivers/mfd/arizona-irq.c
+++ b/drivers/mfd/arizona-irq.c
@@ -184,6 +184,7 @@ static struct irq_chip arizona_irq_chip = {
184}; 184};
185 185
186static struct lock_class_key arizona_irq_lock_class; 186static struct lock_class_key arizona_irq_lock_class;
187static struct lock_class_key arizona_irq_request_class;
187 188
188static int arizona_irq_map(struct irq_domain *h, unsigned int virq, 189static int arizona_irq_map(struct irq_domain *h, unsigned int virq,
189 irq_hw_number_t hw) 190 irq_hw_number_t hw)
@@ -191,7 +192,8 @@ static int arizona_irq_map(struct irq_domain *h, unsigned int virq,
191 struct arizona *data = h->host_data; 192 struct arizona *data = h->host_data;
192 193
193 irq_set_chip_data(virq, data); 194 irq_set_chip_data(virq, data);
194 irq_set_lockdep_class(virq, &arizona_irq_lock_class); 195 irq_set_lockdep_class(virq, &arizona_irq_lock_class,
196 &arizona_irq_request_class);
195 irq_set_chip_and_handler(virq, &arizona_irq_chip, handle_simple_irq); 197 irq_set_chip_and_handler(virq, &arizona_irq_chip, handle_simple_irq);
196 irq_set_nested_thread(virq, 1); 198 irq_set_nested_thread(virq, 1);
197 irq_set_noprobe(virq); 199 irq_set_noprobe(virq);
diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c
index c9714072e224..59c82cdcf48d 100644
--- a/drivers/mfd/cros_ec_spi.c
+++ b/drivers/mfd/cros_ec_spi.c
@@ -377,6 +377,7 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
377 u8 *ptr; 377 u8 *ptr;
378 u8 *rx_buf; 378 u8 *rx_buf;
379 u8 sum; 379 u8 sum;
380 u8 rx_byte;
380 int ret = 0, final_ret; 381 int ret = 0, final_ret;
381 382
382 len = cros_ec_prepare_tx(ec_dev, ec_msg); 383 len = cros_ec_prepare_tx(ec_dev, ec_msg);
@@ -421,25 +422,22 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
421 if (!ret) { 422 if (!ret) {
422 /* Verify that EC can process command */ 423 /* Verify that EC can process command */
423 for (i = 0; i < len; i++) { 424 for (i = 0; i < len; i++) {
424 switch (rx_buf[i]) { 425 rx_byte = rx_buf[i];
425 case EC_SPI_PAST_END: 426 if (rx_byte == EC_SPI_PAST_END ||
426 case EC_SPI_RX_BAD_DATA: 427 rx_byte == EC_SPI_RX_BAD_DATA ||
427 case EC_SPI_NOT_READY: 428 rx_byte == EC_SPI_NOT_READY) {
428 ret = -EAGAIN; 429 ret = -EREMOTEIO;
429 ec_msg->result = EC_RES_IN_PROGRESS;
430 default:
431 break; 430 break;
432 } 431 }
433 if (ret)
434 break;
435 } 432 }
436 if (!ret)
437 ret = cros_ec_spi_receive_packet(ec_dev,
438 ec_msg->insize + sizeof(*response));
439 } else {
440 dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
441 } 433 }
442 434
435 if (!ret)
436 ret = cros_ec_spi_receive_packet(ec_dev,
437 ec_msg->insize + sizeof(*response));
438 else
439 dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
440
443 final_ret = terminate_request(ec_dev); 441 final_ret = terminate_request(ec_dev);
444 442
445 spi_bus_unlock(ec_spi->spi->master); 443 spi_bus_unlock(ec_spi->spi->master);
@@ -508,6 +506,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
508 int i, len; 506 int i, len;
509 u8 *ptr; 507 u8 *ptr;
510 u8 *rx_buf; 508 u8 *rx_buf;
509 u8 rx_byte;
511 int sum; 510 int sum;
512 int ret = 0, final_ret; 511 int ret = 0, final_ret;
513 512
@@ -544,25 +543,22 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
544 if (!ret) { 543 if (!ret) {
545 /* Verify that EC can process command */ 544 /* Verify that EC can process command */
546 for (i = 0; i < len; i++) { 545 for (i = 0; i < len; i++) {
547 switch (rx_buf[i]) { 546 rx_byte = rx_buf[i];
548 case EC_SPI_PAST_END: 547 if (rx_byte == EC_SPI_PAST_END ||
549 case EC_SPI_RX_BAD_DATA: 548 rx_byte == EC_SPI_RX_BAD_DATA ||
550 case EC_SPI_NOT_READY: 549 rx_byte == EC_SPI_NOT_READY) {
551 ret = -EAGAIN; 550 ret = -EREMOTEIO;
552 ec_msg->result = EC_RES_IN_PROGRESS;
553 default:
554 break; 551 break;
555 } 552 }
556 if (ret)
557 break;
558 } 553 }
559 if (!ret)
560 ret = cros_ec_spi_receive_response(ec_dev,
561 ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
562 } else {
563 dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
564 } 554 }
565 555
556 if (!ret)
557 ret = cros_ec_spi_receive_response(ec_dev,
558 ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
559 else
560 dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
561
566 final_ret = terminate_request(ec_dev); 562 final_ret = terminate_request(ec_dev);
567 563
568 spi_bus_unlock(ec_spi->spi->master); 564 spi_bus_unlock(ec_spi->spi->master);
@@ -667,6 +663,7 @@ static int cros_ec_spi_probe(struct spi_device *spi)
667 sizeof(struct ec_response_get_protocol_info); 663 sizeof(struct ec_response_get_protocol_info);
668 ec_dev->dout_size = sizeof(struct ec_host_request); 664 ec_dev->dout_size = sizeof(struct ec_host_request);
669 665
666 ec_spi->last_transfer_ns = ktime_get_ns();
670 667
671 err = cros_ec_register(ec_dev); 668 err = cros_ec_register(ec_dev);
672 if (err) { 669 if (err) {
diff --git a/drivers/mfd/twl4030-audio.c b/drivers/mfd/twl4030-audio.c
index da16bf45fab4..dc94ffc6321a 100644
--- a/drivers/mfd/twl4030-audio.c
+++ b/drivers/mfd/twl4030-audio.c
@@ -159,13 +159,18 @@ unsigned int twl4030_audio_get_mclk(void)
159EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk); 159EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk);
160 160
161static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata, 161static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata,
162 struct device_node *node) 162 struct device_node *parent)
163{ 163{
164 struct device_node *node;
165
164 if (pdata && pdata->codec) 166 if (pdata && pdata->codec)
165 return true; 167 return true;
166 168
167 if (of_find_node_by_name(node, "codec")) 169 node = of_get_child_by_name(parent, "codec");
170 if (node) {
171 of_node_put(node);
168 return true; 172 return true;
173 }
169 174
170 return false; 175 return false;
171} 176}
diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c
index d66502d36ba0..dd19f17a1b63 100644
--- a/drivers/mfd/twl6040.c
+++ b/drivers/mfd/twl6040.c
@@ -97,12 +97,16 @@ static struct reg_sequence twl6040_patch[] = {
97}; 97};
98 98
99 99
100static bool twl6040_has_vibra(struct device_node *node) 100static bool twl6040_has_vibra(struct device_node *parent)
101{ 101{
102#ifdef CONFIG_OF 102 struct device_node *node;
103 if (of_find_node_by_name(node, "vibra")) 103
104 node = of_get_child_by_name(parent, "vibra");
105 if (node) {
106 of_node_put(node);
104 return true; 107 return true;
105#endif 108 }
109
106 return false; 110 return false;
107} 111}
108 112
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 305a7a464d09..4d63ac8a82e0 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -562,7 +562,7 @@ static ssize_t at24_eeprom_write_i2c(struct at24_data *at24, const char *buf,
562static int at24_read(void *priv, unsigned int off, void *val, size_t count) 562static int at24_read(void *priv, unsigned int off, void *val, size_t count)
563{ 563{
564 struct at24_data *at24 = priv; 564 struct at24_data *at24 = priv;
565 struct i2c_client *client; 565 struct device *dev = &at24->client[0]->dev;
566 char *buf = val; 566 char *buf = val;
567 int ret; 567 int ret;
568 568
@@ -572,11 +572,9 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count)
572 if (off + count > at24->chip.byte_len) 572 if (off + count > at24->chip.byte_len)
573 return -EINVAL; 573 return -EINVAL;
574 574
575 client = at24_translate_offset(at24, &off); 575 ret = pm_runtime_get_sync(dev);
576
577 ret = pm_runtime_get_sync(&client->dev);
578 if (ret < 0) { 576 if (ret < 0) {
579 pm_runtime_put_noidle(&client->dev); 577 pm_runtime_put_noidle(dev);
580 return ret; 578 return ret;
581 } 579 }
582 580
@@ -592,7 +590,7 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count)
592 status = at24->read_func(at24, buf, off, count); 590 status = at24->read_func(at24, buf, off, count);
593 if (status < 0) { 591 if (status < 0) {
594 mutex_unlock(&at24->lock); 592 mutex_unlock(&at24->lock);
595 pm_runtime_put(&client->dev); 593 pm_runtime_put(dev);
596 return status; 594 return status;
597 } 595 }
598 buf += status; 596 buf += status;
@@ -602,7 +600,7 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count)
602 600
603 mutex_unlock(&at24->lock); 601 mutex_unlock(&at24->lock);
604 602
605 pm_runtime_put(&client->dev); 603 pm_runtime_put(dev);
606 604
607 return 0; 605 return 0;
608} 606}
@@ -610,7 +608,7 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count)
610static int at24_write(void *priv, unsigned int off, void *val, size_t count) 608static int at24_write(void *priv, unsigned int off, void *val, size_t count)
611{ 609{
612 struct at24_data *at24 = priv; 610 struct at24_data *at24 = priv;
613 struct i2c_client *client; 611 struct device *dev = &at24->client[0]->dev;
614 char *buf = val; 612 char *buf = val;
615 int ret; 613 int ret;
616 614
@@ -620,11 +618,9 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count)
620 if (off + count > at24->chip.byte_len) 618 if (off + count > at24->chip.byte_len)
621 return -EINVAL; 619 return -EINVAL;
622 620
623 client = at24_translate_offset(at24, &off); 621 ret = pm_runtime_get_sync(dev);
624
625 ret = pm_runtime_get_sync(&client->dev);
626 if (ret < 0) { 622 if (ret < 0) {
627 pm_runtime_put_noidle(&client->dev); 623 pm_runtime_put_noidle(dev);
628 return ret; 624 return ret;
629 } 625 }
630 626
@@ -640,7 +636,7 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count)
640 status = at24->write_func(at24, buf, off, count); 636 status = at24->write_func(at24, buf, off, count);
641 if (status < 0) { 637 if (status < 0) {
642 mutex_unlock(&at24->lock); 638 mutex_unlock(&at24->lock);
643 pm_runtime_put(&client->dev); 639 pm_runtime_put(dev);
644 return status; 640 return status;
645 } 641 }
646 buf += status; 642 buf += status;
@@ -650,7 +646,7 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count)
650 646
651 mutex_unlock(&at24->lock); 647 mutex_unlock(&at24->lock);
652 648
653 pm_runtime_put(&client->dev); 649 pm_runtime_put(dev);
654 650
655 return 0; 651 return 0;
656} 652}
@@ -880,7 +876,7 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
880 at24->nvmem_config.reg_read = at24_read; 876 at24->nvmem_config.reg_read = at24_read;
881 at24->nvmem_config.reg_write = at24_write; 877 at24->nvmem_config.reg_write = at24_write;
882 at24->nvmem_config.priv = at24; 878 at24->nvmem_config.priv = at24;
883 at24->nvmem_config.stride = 4; 879 at24->nvmem_config.stride = 1;
884 at24->nvmem_config.word_size = 1; 880 at24->nvmem_config.word_size = 1;
885 at24->nvmem_config.size = chip.byte_len; 881 at24->nvmem_config.size = chip.byte_len;
886 882
diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c
index eda38cbe8530..41f2a9f6851d 100644
--- a/drivers/misc/pti.c
+++ b/drivers/misc/pti.c
@@ -32,7 +32,7 @@
32#include <linux/pci.h> 32#include <linux/pci.h>
33#include <linux/mutex.h> 33#include <linux/mutex.h>
34#include <linux/miscdevice.h> 34#include <linux/miscdevice.h>
35#include <linux/pti.h> 35#include <linux/intel-pti.h>
36#include <linux/slab.h> 36#include <linux/slab.h>
37#include <linux/uaccess.h> 37#include <linux/uaccess.h>
38 38
diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h
index f06cd91964ce..79a5b985ccf5 100644
--- a/drivers/mmc/core/card.h
+++ b/drivers/mmc/core/card.h
@@ -75,9 +75,11 @@ struct mmc_fixup {
75#define EXT_CSD_REV_ANY (-1u) 75#define EXT_CSD_REV_ANY (-1u)
76 76
77#define CID_MANFID_SANDISK 0x2 77#define CID_MANFID_SANDISK 0x2
78#define CID_MANFID_ATP 0x9
78#define CID_MANFID_TOSHIBA 0x11 79#define CID_MANFID_TOSHIBA 0x11
79#define CID_MANFID_MICRON 0x13 80#define CID_MANFID_MICRON 0x13
80#define CID_MANFID_SAMSUNG 0x15 81#define CID_MANFID_SAMSUNG 0x15
82#define CID_MANFID_APACER 0x27
81#define CID_MANFID_KINGSTON 0x70 83#define CID_MANFID_KINGSTON 0x70
82#define CID_MANFID_HYNIX 0x90 84#define CID_MANFID_HYNIX 0x90
83 85
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index d209fb466979..208a762b87ef 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1290,7 +1290,7 @@ out_err:
1290 1290
1291static void mmc_select_driver_type(struct mmc_card *card) 1291static void mmc_select_driver_type(struct mmc_card *card)
1292{ 1292{
1293 int card_drv_type, drive_strength, drv_type; 1293 int card_drv_type, drive_strength, drv_type = 0;
1294 int fixed_drv_type = card->host->fixed_drv_type; 1294 int fixed_drv_type = card->host->fixed_drv_type;
1295 1295
1296 card_drv_type = card->ext_csd.raw_driver_strength | 1296 card_drv_type = card->ext_csd.raw_driver_strength |
diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h
index f664e9cbc9f8..75d317623852 100644
--- a/drivers/mmc/core/quirks.h
+++ b/drivers/mmc/core/quirks.h
@@ -53,6 +53,14 @@ static const struct mmc_fixup mmc_blk_fixups[] = {
53 MMC_QUIRK_BLK_NO_CMD23), 53 MMC_QUIRK_BLK_NO_CMD23),
54 54
55 /* 55 /*
56 * Some SD cards lockup while using CMD23 multiblock transfers.
57 */
58 MMC_FIXUP("AF SD", CID_MANFID_ATP, CID_OEMID_ANY, add_quirk_sd,
59 MMC_QUIRK_BLK_NO_CMD23),
60 MMC_FIXUP("APUSD", CID_MANFID_APACER, 0x5048, add_quirk_sd,
61 MMC_QUIRK_BLK_NO_CMD23),
62
63 /*
56 * Some MMC cards need longer data read timeout than indicated in CSD. 64 * Some MMC cards need longer data read timeout than indicated in CSD.
57 */ 65 */
58 MMC_FIXUP(CID_NAME_ANY, CID_MANFID_MICRON, 0x200, add_quirk_mmc, 66 MMC_FIXUP(CID_NAME_ANY, CID_MANFID_MICRON, 0x200, add_quirk_mmc,
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index f80e911b8843..73b605577447 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1114,7 +1114,7 @@ static int mtd_check_oob_ops(struct mtd_info *mtd, loff_t offs,
1114 if (!ops->oobbuf) 1114 if (!ops->oobbuf)
1115 ops->ooblen = 0; 1115 ops->ooblen = 0;
1116 1116
1117 if (offs < 0 || offs + ops->len >= mtd->size) 1117 if (offs < 0 || offs + ops->len > mtd->size)
1118 return -EINVAL; 1118 return -EINVAL;
1119 1119
1120 if (ops->ooblen) { 1120 if (ops->ooblen) {
diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c
index e0eb51d8c012..dd56a671ea42 100644
--- a/drivers/mtd/nand/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/brcmnand/brcmnand.c
@@ -1763,7 +1763,7 @@ try_dmaread:
1763 err = brcmstb_nand_verify_erased_page(mtd, chip, buf, 1763 err = brcmstb_nand_verify_erased_page(mtd, chip, buf,
1764 addr); 1764 addr);
1765 /* erased page bitflips corrected */ 1765 /* erased page bitflips corrected */
1766 if (err > 0) 1766 if (err >= 0)
1767 return err; 1767 return err;
1768 } 1768 }
1769 1769
diff --git a/drivers/mtd/nand/gpio.c b/drivers/mtd/nand/gpio.c
index 484f7fbc3f7d..a8bde6665c24 100644
--- a/drivers/mtd/nand/gpio.c
+++ b/drivers/mtd/nand/gpio.c
@@ -253,9 +253,9 @@ static int gpio_nand_probe(struct platform_device *pdev)
253 goto out_ce; 253 goto out_ce;
254 } 254 }
255 255
256 gpiomtd->nwp = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW); 256 gpiomtd->ale = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW);
257 if (IS_ERR(gpiomtd->nwp)) { 257 if (IS_ERR(gpiomtd->ale)) {
258 ret = PTR_ERR(gpiomtd->nwp); 258 ret = PTR_ERR(gpiomtd->ale);
259 goto out_ce; 259 goto out_ce;
260 } 260 }
261 261
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index 50f8d4a1b983..d4d824ef64e9 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -1067,9 +1067,6 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
1067 return ret; 1067 return ret;
1068 } 1068 }
1069 1069
1070 /* handle the block mark swapping */
1071 block_mark_swapping(this, payload_virt, auxiliary_virt);
1072
1073 /* Loop over status bytes, accumulating ECC status. */ 1070 /* Loop over status bytes, accumulating ECC status. */
1074 status = auxiliary_virt + nfc_geo->auxiliary_status_offset; 1071 status = auxiliary_virt + nfc_geo->auxiliary_status_offset;
1075 1072
@@ -1158,6 +1155,9 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
1158 max_bitflips = max_t(unsigned int, max_bitflips, *status); 1155 max_bitflips = max_t(unsigned int, max_bitflips, *status);
1159 } 1156 }
1160 1157
1158 /* handle the block mark swapping */
1159 block_mark_swapping(this, buf, auxiliary_virt);
1160
1161 if (oob_required) { 1161 if (oob_required) {
1162 /* 1162 /*
1163 * It's time to deliver the OOB bytes. See gpmi_ecc_read_oob() 1163 * It's time to deliver the OOB bytes. See gpmi_ecc_read_oob()
diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
index a7801f6668a5..6315774d72b3 100644
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -338,6 +338,7 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
338 cmode = MV88E6XXX_PORT_STS_CMODE_2500BASEX; 338 cmode = MV88E6XXX_PORT_STS_CMODE_2500BASEX;
339 break; 339 break;
340 case PHY_INTERFACE_MODE_XGMII: 340 case PHY_INTERFACE_MODE_XGMII:
341 case PHY_INTERFACE_MODE_XAUI:
341 cmode = MV88E6XXX_PORT_STS_CMODE_XAUI; 342 cmode = MV88E6XXX_PORT_STS_CMODE_XAUI;
342 break; 343 break;
343 case PHY_INTERFACE_MODE_RXAUI: 344 case PHY_INTERFACE_MODE_RXAUI:
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
index 57e796870595..105fdb958cef 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
@@ -50,7 +50,7 @@
50#define AQ_CFG_PCI_FUNC_MSIX_IRQS 9U 50#define AQ_CFG_PCI_FUNC_MSIX_IRQS 9U
51#define AQ_CFG_PCI_FUNC_PORTS 2U 51#define AQ_CFG_PCI_FUNC_PORTS 2U
52 52
53#define AQ_CFG_SERVICE_TIMER_INTERVAL (2 * HZ) 53#define AQ_CFG_SERVICE_TIMER_INTERVAL (1 * HZ)
54#define AQ_CFG_POLLING_TIMER_INTERVAL ((unsigned int)(2 * HZ)) 54#define AQ_CFG_POLLING_TIMER_INTERVAL ((unsigned int)(2 * HZ))
55 55
56#define AQ_CFG_SKB_FRAGS_MAX 32U 56#define AQ_CFG_SKB_FRAGS_MAX 32U
@@ -80,6 +80,7 @@
80#define AQ_CFG_DRV_VERSION __stringify(NIC_MAJOR_DRIVER_VERSION)"."\ 80#define AQ_CFG_DRV_VERSION __stringify(NIC_MAJOR_DRIVER_VERSION)"."\
81 __stringify(NIC_MINOR_DRIVER_VERSION)"."\ 81 __stringify(NIC_MINOR_DRIVER_VERSION)"."\
82 __stringify(NIC_BUILD_DRIVER_VERSION)"."\ 82 __stringify(NIC_BUILD_DRIVER_VERSION)"."\
83 __stringify(NIC_REVISION_DRIVER_VERSION) 83 __stringify(NIC_REVISION_DRIVER_VERSION) \
84 AQ_CFG_DRV_VERSION_SUFFIX
84 85
85#endif /* AQ_CFG_H */ 86#endif /* AQ_CFG_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index 70efb7467bf3..f2d8063a2cef 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -66,14 +66,14 @@ static const char aq_ethtool_stat_names[][ETH_GSTRING_LEN] = {
66 "OutUCast", 66 "OutUCast",
67 "OutMCast", 67 "OutMCast",
68 "OutBCast", 68 "OutBCast",
69 "InUCastOctects", 69 "InUCastOctets",
70 "OutUCastOctects", 70 "OutUCastOctets",
71 "InMCastOctects", 71 "InMCastOctets",
72 "OutMCastOctects", 72 "OutMCastOctets",
73 "InBCastOctects", 73 "InBCastOctets",
74 "OutBCastOctects", 74 "OutBCastOctets",
75 "InOctects", 75 "InOctets",
76 "OutOctects", 76 "OutOctets",
77 "InPacketsDma", 77 "InPacketsDma",
78 "OutPacketsDma", 78 "OutPacketsDma",
79 "InOctetsDma", 79 "InOctetsDma",
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index 0207927dc8a6..b3825de6cdfb 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -46,6 +46,28 @@ struct aq_hw_link_status_s {
46 unsigned int mbps; 46 unsigned int mbps;
47}; 47};
48 48
49struct aq_stats_s {
50 u64 uprc;
51 u64 mprc;
52 u64 bprc;
53 u64 erpt;
54 u64 uptc;
55 u64 mptc;
56 u64 bptc;
57 u64 erpr;
58 u64 mbtc;
59 u64 bbtc;
60 u64 mbrc;
61 u64 bbrc;
62 u64 ubrc;
63 u64 ubtc;
64 u64 dpc;
65 u64 dma_pkt_rc;
66 u64 dma_pkt_tc;
67 u64 dma_oct_rc;
68 u64 dma_oct_tc;
69};
70
49#define AQ_HW_IRQ_INVALID 0U 71#define AQ_HW_IRQ_INVALID 0U
50#define AQ_HW_IRQ_LEGACY 1U 72#define AQ_HW_IRQ_LEGACY 1U
51#define AQ_HW_IRQ_MSI 2U 73#define AQ_HW_IRQ_MSI 2U
@@ -85,7 +107,9 @@ struct aq_hw_ops {
85 void (*destroy)(struct aq_hw_s *self); 107 void (*destroy)(struct aq_hw_s *self);
86 108
87 int (*get_hw_caps)(struct aq_hw_s *self, 109 int (*get_hw_caps)(struct aq_hw_s *self,
88 struct aq_hw_caps_s *aq_hw_caps); 110 struct aq_hw_caps_s *aq_hw_caps,
111 unsigned short device,
112 unsigned short subsystem_device);
89 113
90 int (*hw_ring_tx_xmit)(struct aq_hw_s *self, struct aq_ring_s *aq_ring, 114 int (*hw_ring_tx_xmit)(struct aq_hw_s *self, struct aq_ring_s *aq_ring,
91 unsigned int frags); 115 unsigned int frags);
@@ -164,8 +188,7 @@ struct aq_hw_ops {
164 188
165 int (*hw_update_stats)(struct aq_hw_s *self); 189 int (*hw_update_stats)(struct aq_hw_s *self);
166 190
167 int (*hw_get_hw_stats)(struct aq_hw_s *self, u64 *data, 191 struct aq_stats_s *(*hw_get_hw_stats)(struct aq_hw_s *self);
168 unsigned int *p_count);
169 192
170 int (*hw_get_fw_version)(struct aq_hw_s *self, u32 *fw_version); 193 int (*hw_get_fw_version)(struct aq_hw_s *self, u32 *fw_version);
171 194
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 78dfb2ab78ce..75a894a9251c 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -37,6 +37,8 @@ static unsigned int aq_itr_rx;
37module_param_named(aq_itr_rx, aq_itr_rx, uint, 0644); 37module_param_named(aq_itr_rx, aq_itr_rx, uint, 0644);
38MODULE_PARM_DESC(aq_itr_rx, "RX interrupt throttle rate"); 38MODULE_PARM_DESC(aq_itr_rx, "RX interrupt throttle rate");
39 39
40static void aq_nic_update_ndev_stats(struct aq_nic_s *self);
41
40static void aq_nic_rss_init(struct aq_nic_s *self, unsigned int num_rss_queues) 42static void aq_nic_rss_init(struct aq_nic_s *self, unsigned int num_rss_queues)
41{ 43{
42 struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg; 44 struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg;
@@ -166,11 +168,8 @@ static int aq_nic_update_link_status(struct aq_nic_s *self)
166static void aq_nic_service_timer_cb(struct timer_list *t) 168static void aq_nic_service_timer_cb(struct timer_list *t)
167{ 169{
168 struct aq_nic_s *self = from_timer(self, t, service_timer); 170 struct aq_nic_s *self = from_timer(self, t, service_timer);
169 struct net_device *ndev = aq_nic_get_ndev(self); 171 int ctimer = AQ_CFG_SERVICE_TIMER_INTERVAL;
170 int err = 0; 172 int err = 0;
171 unsigned int i = 0U;
172 struct aq_ring_stats_rx_s stats_rx;
173 struct aq_ring_stats_tx_s stats_tx;
174 173
175 if (aq_utils_obj_test(&self->header.flags, AQ_NIC_FLAGS_IS_NOT_READY)) 174 if (aq_utils_obj_test(&self->header.flags, AQ_NIC_FLAGS_IS_NOT_READY))
176 goto err_exit; 175 goto err_exit;
@@ -182,23 +181,14 @@ static void aq_nic_service_timer_cb(struct timer_list *t)
182 if (self->aq_hw_ops.hw_update_stats) 181 if (self->aq_hw_ops.hw_update_stats)
183 self->aq_hw_ops.hw_update_stats(self->aq_hw); 182 self->aq_hw_ops.hw_update_stats(self->aq_hw);
184 183
185 memset(&stats_rx, 0U, sizeof(struct aq_ring_stats_rx_s)); 184 aq_nic_update_ndev_stats(self);
186 memset(&stats_tx, 0U, sizeof(struct aq_ring_stats_tx_s));
187 for (i = AQ_DIMOF(self->aq_vec); i--;) {
188 if (self->aq_vec[i])
189 aq_vec_add_stats(self->aq_vec[i], &stats_rx, &stats_tx);
190 }
191 185
192 ndev->stats.rx_packets = stats_rx.packets; 186 /* If no link - use faster timer rate to detect link up asap */
193 ndev->stats.rx_bytes = stats_rx.bytes; 187 if (!netif_carrier_ok(self->ndev))
194 ndev->stats.rx_errors = stats_rx.errors; 188 ctimer = max(ctimer / 2, 1);
195 ndev->stats.tx_packets = stats_tx.packets;
196 ndev->stats.tx_bytes = stats_tx.bytes;
197 ndev->stats.tx_errors = stats_tx.errors;
198 189
199err_exit: 190err_exit:
200 mod_timer(&self->service_timer, 191 mod_timer(&self->service_timer, jiffies + ctimer);
201 jiffies + AQ_CFG_SERVICE_TIMER_INTERVAL);
202} 192}
203 193
204static void aq_nic_polling_timer_cb(struct timer_list *t) 194static void aq_nic_polling_timer_cb(struct timer_list *t)
@@ -222,7 +212,7 @@ static struct net_device *aq_nic_ndev_alloc(void)
222 212
223struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops, 213struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops,
224 const struct ethtool_ops *et_ops, 214 const struct ethtool_ops *et_ops,
225 struct device *dev, 215 struct pci_dev *pdev,
226 struct aq_pci_func_s *aq_pci_func, 216 struct aq_pci_func_s *aq_pci_func,
227 unsigned int port, 217 unsigned int port,
228 const struct aq_hw_ops *aq_hw_ops) 218 const struct aq_hw_ops *aq_hw_ops)
@@ -242,7 +232,7 @@ struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops,
242 ndev->netdev_ops = ndev_ops; 232 ndev->netdev_ops = ndev_ops;
243 ndev->ethtool_ops = et_ops; 233 ndev->ethtool_ops = et_ops;
244 234
245 SET_NETDEV_DEV(ndev, dev); 235 SET_NETDEV_DEV(ndev, &pdev->dev);
246 236
247 ndev->if_port = port; 237 ndev->if_port = port;
248 self->ndev = ndev; 238 self->ndev = ndev;
@@ -254,7 +244,8 @@ struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops,
254 244
255 self->aq_hw = self->aq_hw_ops.create(aq_pci_func, self->port, 245 self->aq_hw = self->aq_hw_ops.create(aq_pci_func, self->port,
256 &self->aq_hw_ops); 246 &self->aq_hw_ops);
257 err = self->aq_hw_ops.get_hw_caps(self->aq_hw, &self->aq_hw_caps); 247 err = self->aq_hw_ops.get_hw_caps(self->aq_hw, &self->aq_hw_caps,
248 pdev->device, pdev->subsystem_device);
258 if (err < 0) 249 if (err < 0)
259 goto err_exit; 250 goto err_exit;
260 251
@@ -749,16 +740,40 @@ int aq_nic_get_regs_count(struct aq_nic_s *self)
749 740
750void aq_nic_get_stats(struct aq_nic_s *self, u64 *data) 741void aq_nic_get_stats(struct aq_nic_s *self, u64 *data)
751{ 742{
752 struct aq_vec_s *aq_vec = NULL;
753 unsigned int i = 0U; 743 unsigned int i = 0U;
754 unsigned int count = 0U; 744 unsigned int count = 0U;
755 int err = 0; 745 struct aq_vec_s *aq_vec = NULL;
746 struct aq_stats_s *stats = self->aq_hw_ops.hw_get_hw_stats(self->aq_hw);
756 747
757 err = self->aq_hw_ops.hw_get_hw_stats(self->aq_hw, data, &count); 748 if (!stats)
758 if (err < 0)
759 goto err_exit; 749 goto err_exit;
760 750
761 data += count; 751 data[i] = stats->uprc + stats->mprc + stats->bprc;
752 data[++i] = stats->uprc;
753 data[++i] = stats->mprc;
754 data[++i] = stats->bprc;
755 data[++i] = stats->erpt;
756 data[++i] = stats->uptc + stats->mptc + stats->bptc;
757 data[++i] = stats->uptc;
758 data[++i] = stats->mptc;
759 data[++i] = stats->bptc;
760 data[++i] = stats->ubrc;
761 data[++i] = stats->ubtc;
762 data[++i] = stats->mbrc;
763 data[++i] = stats->mbtc;
764 data[++i] = stats->bbrc;
765 data[++i] = stats->bbtc;
766 data[++i] = stats->ubrc + stats->mbrc + stats->bbrc;
767 data[++i] = stats->ubtc + stats->mbtc + stats->bbtc;
768 data[++i] = stats->dma_pkt_rc;
769 data[++i] = stats->dma_pkt_tc;
770 data[++i] = stats->dma_oct_rc;
771 data[++i] = stats->dma_oct_tc;
772 data[++i] = stats->dpc;
773
774 i++;
775
776 data += i;
762 count = 0U; 777 count = 0U;
763 778
764 for (i = 0U, aq_vec = self->aq_vec[0]; 779 for (i = 0U, aq_vec = self->aq_vec[0];
@@ -768,7 +783,20 @@ void aq_nic_get_stats(struct aq_nic_s *self, u64 *data)
768 } 783 }
769 784
770err_exit:; 785err_exit:;
771 (void)err; 786}
787
788static void aq_nic_update_ndev_stats(struct aq_nic_s *self)
789{
790 struct net_device *ndev = self->ndev;
791 struct aq_stats_s *stats = self->aq_hw_ops.hw_get_hw_stats(self->aq_hw);
792
793 ndev->stats.rx_packets = stats->uprc + stats->mprc + stats->bprc;
794 ndev->stats.rx_bytes = stats->ubrc + stats->mbrc + stats->bbrc;
795 ndev->stats.rx_errors = stats->erpr;
796 ndev->stats.tx_packets = stats->uptc + stats->mptc + stats->bptc;
797 ndev->stats.tx_bytes = stats->ubtc + stats->mbtc + stats->bbtc;
798 ndev->stats.tx_errors = stats->erpt;
799 ndev->stats.multicast = stats->mprc;
772} 800}
773 801
774void aq_nic_get_link_ksettings(struct aq_nic_s *self, 802void aq_nic_get_link_ksettings(struct aq_nic_s *self,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
index 4309983acdd6..3c9f8db03d5f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
@@ -71,7 +71,7 @@ struct aq_nic_cfg_s {
71 71
72struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops, 72struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops,
73 const struct ethtool_ops *et_ops, 73 const struct ethtool_ops *et_ops,
74 struct device *dev, 74 struct pci_dev *pdev,
75 struct aq_pci_func_s *aq_pci_func, 75 struct aq_pci_func_s *aq_pci_func,
76 unsigned int port, 76 unsigned int port,
77 const struct aq_hw_ops *aq_hw_ops); 77 const struct aq_hw_ops *aq_hw_ops);
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index cadaa646c89f..58c29d04b186 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -51,7 +51,8 @@ struct aq_pci_func_s *aq_pci_func_alloc(struct aq_hw_ops *aq_hw_ops,
51 pci_set_drvdata(pdev, self); 51 pci_set_drvdata(pdev, self);
52 self->pdev = pdev; 52 self->pdev = pdev;
53 53
54 err = aq_hw_ops->get_hw_caps(NULL, &self->aq_hw_caps); 54 err = aq_hw_ops->get_hw_caps(NULL, &self->aq_hw_caps, pdev->device,
55 pdev->subsystem_device);
55 if (err < 0) 56 if (err < 0)
56 goto err_exit; 57 goto err_exit;
57 58
@@ -59,7 +60,7 @@ struct aq_pci_func_s *aq_pci_func_alloc(struct aq_hw_ops *aq_hw_ops,
59 60
60 for (port = 0; port < self->ports; ++port) { 61 for (port = 0; port < self->ports; ++port) {
61 struct aq_nic_s *aq_nic = aq_nic_alloc_cold(ndev_ops, eth_ops, 62 struct aq_nic_s *aq_nic = aq_nic_alloc_cold(ndev_ops, eth_ops,
62 &pdev->dev, self, 63 pdev, self,
63 port, aq_hw_ops); 64 port, aq_hw_ops);
64 65
65 if (!aq_nic) { 66 if (!aq_nic) {
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index 07b3c49a16a4..f18dce14c93c 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -18,9 +18,20 @@
18#include "hw_atl_a0_internal.h" 18#include "hw_atl_a0_internal.h"
19 19
20static int hw_atl_a0_get_hw_caps(struct aq_hw_s *self, 20static int hw_atl_a0_get_hw_caps(struct aq_hw_s *self,
21 struct aq_hw_caps_s *aq_hw_caps) 21 struct aq_hw_caps_s *aq_hw_caps,
22 unsigned short device,
23 unsigned short subsystem_device)
22{ 24{
23 memcpy(aq_hw_caps, &hw_atl_a0_hw_caps_, sizeof(*aq_hw_caps)); 25 memcpy(aq_hw_caps, &hw_atl_a0_hw_caps_, sizeof(*aq_hw_caps));
26
27 if (device == HW_ATL_DEVICE_ID_D108 && subsystem_device == 0x0001)
28 aq_hw_caps->link_speed_msk &= ~HW_ATL_A0_RATE_10G;
29
30 if (device == HW_ATL_DEVICE_ID_D109 && subsystem_device == 0x0001) {
31 aq_hw_caps->link_speed_msk &= ~HW_ATL_A0_RATE_10G;
32 aq_hw_caps->link_speed_msk &= ~HW_ATL_A0_RATE_5G;
33 }
34
24 return 0; 35 return 0;
25} 36}
26 37
@@ -333,6 +344,10 @@ static int hw_atl_a0_hw_init(struct aq_hw_s *self,
333 hw_atl_a0_hw_rss_set(self, &aq_nic_cfg->aq_rss); 344 hw_atl_a0_hw_rss_set(self, &aq_nic_cfg->aq_rss);
334 hw_atl_a0_hw_rss_hash_set(self, &aq_nic_cfg->aq_rss); 345 hw_atl_a0_hw_rss_hash_set(self, &aq_nic_cfg->aq_rss);
335 346
347 /* Reset link status and read out initial hardware counters */
348 self->aq_link_status.mbps = 0;
349 hw_atl_utils_update_stats(self);
350
336 err = aq_hw_err_from_flags(self); 351 err = aq_hw_err_from_flags(self);
337 if (err < 0) 352 if (err < 0)
338 goto err_exit; 353 goto err_exit;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index ec68c20efcbd..e4a22ce7bf09 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -16,11 +16,23 @@
16#include "hw_atl_utils.h" 16#include "hw_atl_utils.h"
17#include "hw_atl_llh.h" 17#include "hw_atl_llh.h"
18#include "hw_atl_b0_internal.h" 18#include "hw_atl_b0_internal.h"
19#include "hw_atl_llh_internal.h"
19 20
20static int hw_atl_b0_get_hw_caps(struct aq_hw_s *self, 21static int hw_atl_b0_get_hw_caps(struct aq_hw_s *self,
21 struct aq_hw_caps_s *aq_hw_caps) 22 struct aq_hw_caps_s *aq_hw_caps,
23 unsigned short device,
24 unsigned short subsystem_device)
22{ 25{
23 memcpy(aq_hw_caps, &hw_atl_b0_hw_caps_, sizeof(*aq_hw_caps)); 26 memcpy(aq_hw_caps, &hw_atl_b0_hw_caps_, sizeof(*aq_hw_caps));
27
28 if (device == HW_ATL_DEVICE_ID_D108 && subsystem_device == 0x0001)
29 aq_hw_caps->link_speed_msk &= ~HW_ATL_B0_RATE_10G;
30
31 if (device == HW_ATL_DEVICE_ID_D109 && subsystem_device == 0x0001) {
32 aq_hw_caps->link_speed_msk &= ~HW_ATL_B0_RATE_10G;
33 aq_hw_caps->link_speed_msk &= ~HW_ATL_B0_RATE_5G;
34 }
35
24 return 0; 36 return 0;
25} 37}
26 38
@@ -357,6 +369,7 @@ static int hw_atl_b0_hw_init(struct aq_hw_s *self,
357 }; 369 };
358 370
359 int err = 0; 371 int err = 0;
372 u32 val;
360 373
361 self->aq_nic_cfg = aq_nic_cfg; 374 self->aq_nic_cfg = aq_nic_cfg;
362 375
@@ -374,6 +387,20 @@ static int hw_atl_b0_hw_init(struct aq_hw_s *self,
374 hw_atl_b0_hw_rss_set(self, &aq_nic_cfg->aq_rss); 387 hw_atl_b0_hw_rss_set(self, &aq_nic_cfg->aq_rss);
375 hw_atl_b0_hw_rss_hash_set(self, &aq_nic_cfg->aq_rss); 388 hw_atl_b0_hw_rss_hash_set(self, &aq_nic_cfg->aq_rss);
376 389
390 /* Force limit MRRS on RDM/TDM to 2K */
391 val = aq_hw_read_reg(self, pci_reg_control6_adr);
392 aq_hw_write_reg(self, pci_reg_control6_adr, (val & ~0x707) | 0x404);
393
394 /* TX DMA total request limit. B0 hardware is not capable to
395 * handle more than (8K-MRRS) incoming DMA data.
396 * Value 24 in 256byte units
397 */
398 aq_hw_write_reg(self, tx_dma_total_req_limit_adr, 24);
399
400 /* Reset link status and read out initial hardware counters */
401 self->aq_link_status.mbps = 0;
402 hw_atl_utils_update_stats(self);
403
377 err = aq_hw_err_from_flags(self); 404 err = aq_hw_err_from_flags(self);
378 if (err < 0) 405 if (err < 0)
379 goto err_exit; 406 goto err_exit;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
index 5527fc0e5942..93450ec930e8 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
@@ -2343,6 +2343,9 @@
2343#define tx_dma_desc_base_addrmsw_adr(descriptor) \ 2343#define tx_dma_desc_base_addrmsw_adr(descriptor) \
2344 (0x00007c04u + (descriptor) * 0x40) 2344 (0x00007c04u + (descriptor) * 0x40)
2345 2345
2346/* tx dma total request limit */
2347#define tx_dma_total_req_limit_adr 0x00007b20u
2348
2346/* tx interrupt moderation control register definitions 2349/* tx interrupt moderation control register definitions
2347 * Preprocessor definitions for TX Interrupt Moderation Control Register 2350 * Preprocessor definitions for TX Interrupt Moderation Control Register
2348 * Base Address: 0x00008980 2351 * Base Address: 0x00008980
@@ -2369,6 +2372,9 @@
2369/* default value of bitfield reg_res_dsbl */ 2372/* default value of bitfield reg_res_dsbl */
2370#define pci_reg_res_dsbl_default 0x1 2373#define pci_reg_res_dsbl_default 0x1
2371 2374
2375/* PCI core control register */
2376#define pci_reg_control6_adr 0x1014u
2377
2372/* global microprocessor scratch pad definitions */ 2378/* global microprocessor scratch pad definitions */
2373#define glb_cpu_scratch_scp_adr(scratch_scp) (0x00000300u + (scratch_scp) * 0x4) 2379#define glb_cpu_scratch_scp_adr(scratch_scp) (0x00000300u + (scratch_scp) * 0x4)
2374 2380
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index 1fe016fc4bc7..f2ce12ed4218 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -503,73 +503,43 @@ int hw_atl_utils_update_stats(struct aq_hw_s *self)
503 struct hw_atl_s *hw_self = PHAL_ATLANTIC; 503 struct hw_atl_s *hw_self = PHAL_ATLANTIC;
504 struct hw_aq_atl_utils_mbox mbox; 504 struct hw_aq_atl_utils_mbox mbox;
505 505
506 if (!self->aq_link_status.mbps)
507 return 0;
508
509 hw_atl_utils_mpi_read_stats(self, &mbox); 506 hw_atl_utils_mpi_read_stats(self, &mbox);
510 507
511#define AQ_SDELTA(_N_) (hw_self->curr_stats._N_ += \ 508#define AQ_SDELTA(_N_) (hw_self->curr_stats._N_ += \
512 mbox.stats._N_ - hw_self->last_stats._N_) 509 mbox.stats._N_ - hw_self->last_stats._N_)
513 510 if (self->aq_link_status.mbps) {
514 AQ_SDELTA(uprc); 511 AQ_SDELTA(uprc);
515 AQ_SDELTA(mprc); 512 AQ_SDELTA(mprc);
516 AQ_SDELTA(bprc); 513 AQ_SDELTA(bprc);
517 AQ_SDELTA(erpt); 514 AQ_SDELTA(erpt);
518 515
519 AQ_SDELTA(uptc); 516 AQ_SDELTA(uptc);
520 AQ_SDELTA(mptc); 517 AQ_SDELTA(mptc);
521 AQ_SDELTA(bptc); 518 AQ_SDELTA(bptc);
522 AQ_SDELTA(erpr); 519 AQ_SDELTA(erpr);
523 520
524 AQ_SDELTA(ubrc); 521 AQ_SDELTA(ubrc);
525 AQ_SDELTA(ubtc); 522 AQ_SDELTA(ubtc);
526 AQ_SDELTA(mbrc); 523 AQ_SDELTA(mbrc);
527 AQ_SDELTA(mbtc); 524 AQ_SDELTA(mbtc);
528 AQ_SDELTA(bbrc); 525 AQ_SDELTA(bbrc);
529 AQ_SDELTA(bbtc); 526 AQ_SDELTA(bbtc);
530 AQ_SDELTA(dpc); 527 AQ_SDELTA(dpc);
531 528 }
532#undef AQ_SDELTA 529#undef AQ_SDELTA
530 hw_self->curr_stats.dma_pkt_rc = stats_rx_dma_good_pkt_counterlsw_get(self);
531 hw_self->curr_stats.dma_pkt_tc = stats_tx_dma_good_pkt_counterlsw_get(self);
532 hw_self->curr_stats.dma_oct_rc = stats_rx_dma_good_octet_counterlsw_get(self);
533 hw_self->curr_stats.dma_oct_tc = stats_tx_dma_good_octet_counterlsw_get(self);
533 534
534 memcpy(&hw_self->last_stats, &mbox.stats, sizeof(mbox.stats)); 535 memcpy(&hw_self->last_stats, &mbox.stats, sizeof(mbox.stats));
535 536
536 return 0; 537 return 0;
537} 538}
538 539
539int hw_atl_utils_get_hw_stats(struct aq_hw_s *self, 540struct aq_stats_s *hw_atl_utils_get_hw_stats(struct aq_hw_s *self)
540 u64 *data, unsigned int *p_count)
541{ 541{
542 struct hw_atl_s *hw_self = PHAL_ATLANTIC; 542 return &PHAL_ATLANTIC->curr_stats;
543 struct hw_atl_stats_s *stats = &hw_self->curr_stats;
544 int i = 0;
545
546 data[i] = stats->uprc + stats->mprc + stats->bprc;
547 data[++i] = stats->uprc;
548 data[++i] = stats->mprc;
549 data[++i] = stats->bprc;
550 data[++i] = stats->erpt;
551 data[++i] = stats->uptc + stats->mptc + stats->bptc;
552 data[++i] = stats->uptc;
553 data[++i] = stats->mptc;
554 data[++i] = stats->bptc;
555 data[++i] = stats->ubrc;
556 data[++i] = stats->ubtc;
557 data[++i] = stats->mbrc;
558 data[++i] = stats->mbtc;
559 data[++i] = stats->bbrc;
560 data[++i] = stats->bbtc;
561 data[++i] = stats->ubrc + stats->mbrc + stats->bbrc;
562 data[++i] = stats->ubtc + stats->mbtc + stats->bbtc;
563 data[++i] = stats_rx_dma_good_pkt_counterlsw_get(self);
564 data[++i] = stats_tx_dma_good_pkt_counterlsw_get(self);
565 data[++i] = stats_rx_dma_good_octet_counterlsw_get(self);
566 data[++i] = stats_tx_dma_good_octet_counterlsw_get(self);
567 data[++i] = stats->dpc;
568
569 if (p_count)
570 *p_count = ++i;
571
572 return 0;
573} 543}
574 544
575static const u32 hw_atl_utils_hw_mac_regs[] = { 545static const u32 hw_atl_utils_hw_mac_regs[] = {
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
index c99cc690e425..21aeca6908d3 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
@@ -129,7 +129,7 @@ struct __packed hw_aq_atl_utils_mbox {
129struct __packed hw_atl_s { 129struct __packed hw_atl_s {
130 struct aq_hw_s base; 130 struct aq_hw_s base;
131 struct hw_atl_stats_s last_stats; 131 struct hw_atl_stats_s last_stats;
132 struct hw_atl_stats_s curr_stats; 132 struct aq_stats_s curr_stats;
133 u64 speed; 133 u64 speed;
134 unsigned int chip_features; 134 unsigned int chip_features;
135 u32 fw_ver_actual; 135 u32 fw_ver_actual;
@@ -207,8 +207,6 @@ int hw_atl_utils_get_fw_version(struct aq_hw_s *self, u32 *fw_version);
207 207
208int hw_atl_utils_update_stats(struct aq_hw_s *self); 208int hw_atl_utils_update_stats(struct aq_hw_s *self);
209 209
210int hw_atl_utils_get_hw_stats(struct aq_hw_s *self, 210struct aq_stats_s *hw_atl_utils_get_hw_stats(struct aq_hw_s *self);
211 u64 *data,
212 unsigned int *p_count);
213 211
214#endif /* HW_ATL_UTILS_H */ 212#endif /* HW_ATL_UTILS_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/ver.h b/drivers/net/ethernet/aquantia/atlantic/ver.h
index 0de858d215c2..9009f2651e70 100644
--- a/drivers/net/ethernet/aquantia/atlantic/ver.h
+++ b/drivers/net/ethernet/aquantia/atlantic/ver.h
@@ -11,8 +11,10 @@
11#define VER_H 11#define VER_H
12 12
13#define NIC_MAJOR_DRIVER_VERSION 1 13#define NIC_MAJOR_DRIVER_VERSION 1
14#define NIC_MINOR_DRIVER_VERSION 5 14#define NIC_MINOR_DRIVER_VERSION 6
15#define NIC_BUILD_DRIVER_VERSION 345 15#define NIC_BUILD_DRIVER_VERSION 13
16#define NIC_REVISION_DRIVER_VERSION 0 16#define NIC_REVISION_DRIVER_VERSION 0
17 17
18#define AQ_CFG_DRV_VERSION_SUFFIX "-kern"
19
18#endif /* VER_H */ 20#endif /* VER_H */
diff --git a/drivers/net/ethernet/arc/emac.h b/drivers/net/ethernet/arc/emac.h
index 3c63b16d485f..d9efbc8d783b 100644
--- a/drivers/net/ethernet/arc/emac.h
+++ b/drivers/net/ethernet/arc/emac.h
@@ -159,6 +159,8 @@ struct arc_emac_priv {
159 unsigned int link; 159 unsigned int link;
160 unsigned int duplex; 160 unsigned int duplex;
161 unsigned int speed; 161 unsigned int speed;
162
163 unsigned int rx_missed_errors;
162}; 164};
163 165
164/** 166/**
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index 3241af1ce718..bd277b0dc615 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -26,6 +26,8 @@
26 26
27#include "emac.h" 27#include "emac.h"
28 28
29static void arc_emac_restart(struct net_device *ndev);
30
29/** 31/**
30 * arc_emac_tx_avail - Return the number of available slots in the tx ring. 32 * arc_emac_tx_avail - Return the number of available slots in the tx ring.
31 * @priv: Pointer to ARC EMAC private data structure. 33 * @priv: Pointer to ARC EMAC private data structure.
@@ -210,39 +212,48 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
210 continue; 212 continue;
211 } 213 }
212 214
213 pktlen = info & LEN_MASK; 215 /* Prepare the BD for next cycle. netif_receive_skb()
214 stats->rx_packets++; 216 * only if new skb was allocated and mapped to avoid holes
215 stats->rx_bytes += pktlen; 217 * in the RX fifo.
216 skb = rx_buff->skb; 218 */
217 skb_put(skb, pktlen); 219 skb = netdev_alloc_skb_ip_align(ndev, EMAC_BUFFER_SIZE);
218 skb->dev = ndev; 220 if (unlikely(!skb)) {
219 skb->protocol = eth_type_trans(skb, ndev); 221 if (net_ratelimit())
220 222 netdev_err(ndev, "cannot allocate skb\n");
221 dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr), 223 /* Return ownership to EMAC */
222 dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE); 224 rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
223
224 /* Prepare the BD for next cycle */
225 rx_buff->skb = netdev_alloc_skb_ip_align(ndev,
226 EMAC_BUFFER_SIZE);
227 if (unlikely(!rx_buff->skb)) {
228 stats->rx_errors++; 225 stats->rx_errors++;
229 /* Because receive_skb is below, increment rx_dropped */
230 stats->rx_dropped++; 226 stats->rx_dropped++;
231 continue; 227 continue;
232 } 228 }
233 229
234 /* receive_skb only if new skb was allocated to avoid holes */ 230 addr = dma_map_single(&ndev->dev, (void *)skb->data,
235 netif_receive_skb(skb);
236
237 addr = dma_map_single(&ndev->dev, (void *)rx_buff->skb->data,
238 EMAC_BUFFER_SIZE, DMA_FROM_DEVICE); 231 EMAC_BUFFER_SIZE, DMA_FROM_DEVICE);
239 if (dma_mapping_error(&ndev->dev, addr)) { 232 if (dma_mapping_error(&ndev->dev, addr)) {
240 if (net_ratelimit()) 233 if (net_ratelimit())
241 netdev_err(ndev, "cannot dma map\n"); 234 netdev_err(ndev, "cannot map dma buffer\n");
242 dev_kfree_skb(rx_buff->skb); 235 dev_kfree_skb(skb);
236 /* Return ownership to EMAC */
237 rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
243 stats->rx_errors++; 238 stats->rx_errors++;
239 stats->rx_dropped++;
244 continue; 240 continue;
245 } 241 }
242
243 /* unmap previosly mapped skb */
244 dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr),
245 dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE);
246
247 pktlen = info & LEN_MASK;
248 stats->rx_packets++;
249 stats->rx_bytes += pktlen;
250 skb_put(rx_buff->skb, pktlen);
251 rx_buff->skb->dev = ndev;
252 rx_buff->skb->protocol = eth_type_trans(rx_buff->skb, ndev);
253
254 netif_receive_skb(rx_buff->skb);
255
256 rx_buff->skb = skb;
246 dma_unmap_addr_set(rx_buff, addr, addr); 257 dma_unmap_addr_set(rx_buff, addr, addr);
247 dma_unmap_len_set(rx_buff, len, EMAC_BUFFER_SIZE); 258 dma_unmap_len_set(rx_buff, len, EMAC_BUFFER_SIZE);
248 259
@@ -259,6 +270,53 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
259} 270}
260 271
261/** 272/**
273 * arc_emac_rx_miss_handle - handle R_MISS register
274 * @ndev: Pointer to the net_device structure.
275 */
276static void arc_emac_rx_miss_handle(struct net_device *ndev)
277{
278 struct arc_emac_priv *priv = netdev_priv(ndev);
279 struct net_device_stats *stats = &ndev->stats;
280 unsigned int miss;
281
282 miss = arc_reg_get(priv, R_MISS);
283 if (miss) {
284 stats->rx_errors += miss;
285 stats->rx_missed_errors += miss;
286 priv->rx_missed_errors += miss;
287 }
288}
289
290/**
291 * arc_emac_rx_stall_check - check RX stall
292 * @ndev: Pointer to the net_device structure.
293 * @budget: How many BDs requested to process on 1 call.
294 * @work_done: How many BDs processed
295 *
296 * Under certain conditions EMAC stop reception of incoming packets and
297 * continuously increment R_MISS register instead of saving data into
298 * provided buffer. This function detect that condition and restart
299 * EMAC.
300 */
301static void arc_emac_rx_stall_check(struct net_device *ndev,
302 int budget, unsigned int work_done)
303{
304 struct arc_emac_priv *priv = netdev_priv(ndev);
305 struct arc_emac_bd *rxbd;
306
307 if (work_done)
308 priv->rx_missed_errors = 0;
309
310 if (priv->rx_missed_errors && budget) {
311 rxbd = &priv->rxbd[priv->last_rx_bd];
312 if (le32_to_cpu(rxbd->info) & FOR_EMAC) {
313 arc_emac_restart(ndev);
314 priv->rx_missed_errors = 0;
315 }
316 }
317}
318
319/**
262 * arc_emac_poll - NAPI poll handler. 320 * arc_emac_poll - NAPI poll handler.
263 * @napi: Pointer to napi_struct structure. 321 * @napi: Pointer to napi_struct structure.
264 * @budget: How many BDs to process on 1 call. 322 * @budget: How many BDs to process on 1 call.
@@ -272,6 +330,7 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
272 unsigned int work_done; 330 unsigned int work_done;
273 331
274 arc_emac_tx_clean(ndev); 332 arc_emac_tx_clean(ndev);
333 arc_emac_rx_miss_handle(ndev);
275 334
276 work_done = arc_emac_rx(ndev, budget); 335 work_done = arc_emac_rx(ndev, budget);
277 if (work_done < budget) { 336 if (work_done < budget) {
@@ -279,6 +338,8 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
279 arc_reg_or(priv, R_ENABLE, RXINT_MASK | TXINT_MASK); 338 arc_reg_or(priv, R_ENABLE, RXINT_MASK | TXINT_MASK);
280 } 339 }
281 340
341 arc_emac_rx_stall_check(ndev, budget, work_done);
342
282 return work_done; 343 return work_done;
283} 344}
284 345
@@ -320,6 +381,8 @@ static irqreturn_t arc_emac_intr(int irq, void *dev_instance)
320 if (status & MSER_MASK) { 381 if (status & MSER_MASK) {
321 stats->rx_missed_errors += 0x100; 382 stats->rx_missed_errors += 0x100;
322 stats->rx_errors += 0x100; 383 stats->rx_errors += 0x100;
384 priv->rx_missed_errors += 0x100;
385 napi_schedule(&priv->napi);
323 } 386 }
324 387
325 if (status & RXCR_MASK) { 388 if (status & RXCR_MASK) {
@@ -732,6 +795,63 @@ static int arc_emac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
732} 795}
733 796
734 797
798/**
799 * arc_emac_restart - Restart EMAC
800 * @ndev: Pointer to net_device structure.
801 *
802 * This function do hardware reset of EMAC in order to restore
803 * network packets reception.
804 */
805static void arc_emac_restart(struct net_device *ndev)
806{
807 struct arc_emac_priv *priv = netdev_priv(ndev);
808 struct net_device_stats *stats = &ndev->stats;
809 int i;
810
811 if (net_ratelimit())
812 netdev_warn(ndev, "restarting stalled EMAC\n");
813
814 netif_stop_queue(ndev);
815
816 /* Disable interrupts */
817 arc_reg_clr(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
818
819 /* Disable EMAC */
820 arc_reg_clr(priv, R_CTRL, EN_MASK);
821
822 /* Return the sk_buff to system */
823 arc_free_tx_queue(ndev);
824
825 /* Clean Tx BD's */
826 priv->txbd_curr = 0;
827 priv->txbd_dirty = 0;
828 memset(priv->txbd, 0, TX_RING_SZ);
829
830 for (i = 0; i < RX_BD_NUM; i++) {
831 struct arc_emac_bd *rxbd = &priv->rxbd[i];
832 unsigned int info = le32_to_cpu(rxbd->info);
833
834 if (!(info & FOR_EMAC)) {
835 stats->rx_errors++;
836 stats->rx_dropped++;
837 }
838 /* Return ownership to EMAC */
839 rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
840 }
841 priv->last_rx_bd = 0;
842
843 /* Make sure info is visible to EMAC before enable */
844 wmb();
845
846 /* Enable interrupts */
847 arc_reg_set(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
848
849 /* Enable EMAC */
850 arc_reg_or(priv, R_CTRL, EN_MASK);
851
852 netif_start_queue(ndev);
853}
854
735static const struct net_device_ops arc_emac_netdev_ops = { 855static const struct net_device_ops arc_emac_netdev_ops = {
736 .ndo_open = arc_emac_open, 856 .ndo_open = arc_emac_open,
737 .ndo_stop = arc_emac_stop, 857 .ndo_stop = arc_emac_stop,
diff --git a/drivers/net/ethernet/arc/emac_rockchip.c b/drivers/net/ethernet/arc/emac_rockchip.c
index c6163874e4e7..16f9bee992fe 100644
--- a/drivers/net/ethernet/arc/emac_rockchip.c
+++ b/drivers/net/ethernet/arc/emac_rockchip.c
@@ -199,9 +199,11 @@ static int emac_rockchip_probe(struct platform_device *pdev)
199 199
200 /* RMII interface needs always a rate of 50MHz */ 200 /* RMII interface needs always a rate of 50MHz */
201 err = clk_set_rate(priv->refclk, 50000000); 201 err = clk_set_rate(priv->refclk, 50000000);
202 if (err) 202 if (err) {
203 dev_err(dev, 203 dev_err(dev,
204 "failed to change reference clock rate (%d)\n", err); 204 "failed to change reference clock rate (%d)\n", err);
205 goto out_regulator_disable;
206 }
205 207
206 if (priv->soc_data->need_div_macclk) { 208 if (priv->soc_data->need_div_macclk) {
207 priv->macclk = devm_clk_get(dev, "macclk"); 209 priv->macclk = devm_clk_get(dev, "macclk");
@@ -230,12 +232,14 @@ static int emac_rockchip_probe(struct platform_device *pdev)
230 err = arc_emac_probe(ndev, interface); 232 err = arc_emac_probe(ndev, interface);
231 if (err) { 233 if (err) {
232 dev_err(dev, "failed to probe arc emac (%d)\n", err); 234 dev_err(dev, "failed to probe arc emac (%d)\n", err);
233 goto out_regulator_disable; 235 goto out_clk_disable_macclk;
234 } 236 }
235 237
236 return 0; 238 return 0;
239
237out_clk_disable_macclk: 240out_clk_disable_macclk:
238 clk_disable_unprepare(priv->macclk); 241 if (priv->soc_data->need_div_macclk)
242 clk_disable_unprepare(priv->macclk);
239out_regulator_disable: 243out_regulator_disable:
240 if (priv->regulator) 244 if (priv->regulator)
241 regulator_disable(priv->regulator); 245 regulator_disable(priv->regulator);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 4c739d5355d2..8ae269ec17a1 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -3030,7 +3030,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
3030 3030
3031 del_timer_sync(&bp->timer); 3031 del_timer_sync(&bp->timer);
3032 3032
3033 if (IS_PF(bp)) { 3033 if (IS_PF(bp) && !BP_NOMCP(bp)) {
3034 /* Set ALWAYS_ALIVE bit in shmem */ 3034 /* Set ALWAYS_ALIVE bit in shmem */
3035 bp->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE; 3035 bp->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE;
3036 bnx2x_drv_pulse(bp); 3036 bnx2x_drv_pulse(bp);
@@ -3116,7 +3116,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
3116 bp->cnic_loaded = false; 3116 bp->cnic_loaded = false;
3117 3117
3118 /* Clear driver version indication in shmem */ 3118 /* Clear driver version indication in shmem */
3119 if (IS_PF(bp)) 3119 if (IS_PF(bp) && !BP_NOMCP(bp))
3120 bnx2x_update_mng_version(bp); 3120 bnx2x_update_mng_version(bp);
3121 3121
3122 /* Check if there are pending parity attentions. If there are - set 3122 /* Check if there are pending parity attentions. If there are - set
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 91e2a7560b48..ddd5d3ebd201 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -9578,6 +9578,15 @@ static int bnx2x_init_shmem(struct bnx2x *bp)
9578 9578
9579 do { 9579 do {
9580 bp->common.shmem_base = REG_RD(bp, MISC_REG_SHARED_MEM_ADDR); 9580 bp->common.shmem_base = REG_RD(bp, MISC_REG_SHARED_MEM_ADDR);
9581
9582 /* If we read all 0xFFs, means we are in PCI error state and
9583 * should bail out to avoid crashes on adapter's FW reads.
9584 */
9585 if (bp->common.shmem_base == 0xFFFFFFFF) {
9586 bp->flags |= NO_MCP_FLAG;
9587 return -ENODEV;
9588 }
9589
9581 if (bp->common.shmem_base) { 9590 if (bp->common.shmem_base) {
9582 val = SHMEM_RD(bp, validity_map[BP_PORT(bp)]); 9591 val = SHMEM_RD(bp, validity_map[BP_PORT(bp)]);
9583 if (val & SHR_MEM_VALIDITY_MB) 9592 if (val & SHR_MEM_VALIDITY_MB)
@@ -14320,7 +14329,10 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev)
14320 BNX2X_ERR("IO slot reset --> driver unload\n"); 14329 BNX2X_ERR("IO slot reset --> driver unload\n");
14321 14330
14322 /* MCP should have been reset; Need to wait for validity */ 14331 /* MCP should have been reset; Need to wait for validity */
14323 bnx2x_init_shmem(bp); 14332 if (bnx2x_init_shmem(bp)) {
14333 rtnl_unlock();
14334 return PCI_ERS_RESULT_DISCONNECT;
14335 }
14324 14336
14325 if (IS_PF(bp) && SHMEM2_HAS(bp, drv_capabilities_flag)) { 14337 if (IS_PF(bp) && SHMEM2_HAS(bp, drv_capabilities_flag)) {
14326 u32 v; 14338 u32 v;
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index de51c2177d03..8995cfefbfcf 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -4,11 +4,13 @@
4 * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com) 4 * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com)
5 * Copyright (C) 2001, 2002, 2003 Jeff Garzik (jgarzik@pobox.com) 5 * Copyright (C) 2001, 2002, 2003 Jeff Garzik (jgarzik@pobox.com)
6 * Copyright (C) 2004 Sun Microsystems Inc. 6 * Copyright (C) 2004 Sun Microsystems Inc.
7 * Copyright (C) 2005-2014 Broadcom Corporation. 7 * Copyright (C) 2005-2016 Broadcom Corporation.
8 * Copyright (C) 2016-2017 Broadcom Limited.
8 * 9 *
9 * Firmware is: 10 * Firmware is:
10 * Derived from proprietary unpublished source code, 11 * Derived from proprietary unpublished source code,
11 * Copyright (C) 2000-2003 Broadcom Corporation. 12 * Copyright (C) 2000-2016 Broadcom Corporation.
13 * Copyright (C) 2016-2017 Broadcom Ltd.
12 * 14 *
13 * Permission is hereby granted for the distribution of this firmware 15 * Permission is hereby granted for the distribution of this firmware
14 * data in hexadecimal or equivalent format, provided this copyright 16 * data in hexadecimal or equivalent format, provided this copyright
@@ -10052,6 +10054,16 @@ static int tg3_reset_hw(struct tg3 *tp, bool reset_phy)
10052 10054
10053 tw32(GRC_MODE, tp->grc_mode | val); 10055 tw32(GRC_MODE, tp->grc_mode | val);
10054 10056
10057 /* On one of the AMD platform, MRRS is restricted to 4000 because of
10058 * south bridge limitation. As a workaround, Driver is setting MRRS
10059 * to 2048 instead of default 4096.
10060 */
10061 if (tp->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL &&
10062 tp->pdev->subsystem_device == TG3PCI_SUBDEVICE_ID_DELL_5762) {
10063 val = tr32(TG3PCI_DEV_STATUS_CTRL) & ~MAX_READ_REQ_MASK;
10064 tw32(TG3PCI_DEV_STATUS_CTRL, val | MAX_READ_REQ_SIZE_2048);
10065 }
10066
10055 /* Setup the timer prescalar register. Clock is always 66Mhz. */ 10067 /* Setup the timer prescalar register. Clock is always 66Mhz. */
10056 val = tr32(GRC_MISC_CFG); 10068 val = tr32(GRC_MISC_CFG);
10057 val &= ~0xff; 10069 val &= ~0xff;
@@ -14225,7 +14237,10 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu)
14225 /* Reset PHY, otherwise the read DMA engine will be in a mode that 14237 /* Reset PHY, otherwise the read DMA engine will be in a mode that
14226 * breaks all requests to 256 bytes. 14238 * breaks all requests to 256 bytes.
14227 */ 14239 */
14228 if (tg3_asic_rev(tp) == ASIC_REV_57766) 14240 if (tg3_asic_rev(tp) == ASIC_REV_57766 ||
14241 tg3_asic_rev(tp) == ASIC_REV_5717 ||
14242 tg3_asic_rev(tp) == ASIC_REV_5719 ||
14243 tg3_asic_rev(tp) == ASIC_REV_5720)
14229 reset_phy = true; 14244 reset_phy = true;
14230 14245
14231 err = tg3_restart_hw(tp, reset_phy); 14246 err = tg3_restart_hw(tp, reset_phy);
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index c2d02d02d1e6..1f0271fa7c74 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -5,7 +5,8 @@
5 * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com) 5 * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com)
6 * Copyright (C) 2001 Jeff Garzik (jgarzik@pobox.com) 6 * Copyright (C) 2001 Jeff Garzik (jgarzik@pobox.com)
7 * Copyright (C) 2004 Sun Microsystems Inc. 7 * Copyright (C) 2004 Sun Microsystems Inc.
8 * Copyright (C) 2007-2014 Broadcom Corporation. 8 * Copyright (C) 2007-2016 Broadcom Corporation.
9 * Copyright (C) 2016-2017 Broadcom Limited.
9 */ 10 */
10 11
11#ifndef _T3_H 12#ifndef _T3_H
@@ -96,6 +97,7 @@
96#define TG3PCI_SUBDEVICE_ID_DELL_JAGUAR 0x0106 97#define TG3PCI_SUBDEVICE_ID_DELL_JAGUAR 0x0106
97#define TG3PCI_SUBDEVICE_ID_DELL_MERLOT 0x0109 98#define TG3PCI_SUBDEVICE_ID_DELL_MERLOT 0x0109
98#define TG3PCI_SUBDEVICE_ID_DELL_SLIM_MERLOT 0x010a 99#define TG3PCI_SUBDEVICE_ID_DELL_SLIM_MERLOT 0x010a
100#define TG3PCI_SUBDEVICE_ID_DELL_5762 0x07f0
99#define TG3PCI_SUBVENDOR_ID_COMPAQ PCI_VENDOR_ID_COMPAQ 101#define TG3PCI_SUBVENDOR_ID_COMPAQ PCI_VENDOR_ID_COMPAQ
100#define TG3PCI_SUBDEVICE_ID_COMPAQ_BANSHEE 0x007c 102#define TG3PCI_SUBDEVICE_ID_COMPAQ_BANSHEE 0x007c
101#define TG3PCI_SUBDEVICE_ID_COMPAQ_BANSHEE_2 0x009a 103#define TG3PCI_SUBDEVICE_ID_COMPAQ_BANSHEE_2 0x009a
@@ -281,6 +283,9 @@
281#define TG3PCI_STD_RING_PROD_IDX 0x00000098 /* 64-bit */ 283#define TG3PCI_STD_RING_PROD_IDX 0x00000098 /* 64-bit */
282#define TG3PCI_RCV_RET_RING_CON_IDX 0x000000a0 /* 64-bit */ 284#define TG3PCI_RCV_RET_RING_CON_IDX 0x000000a0 /* 64-bit */
283/* 0xa8 --> 0xb8 unused */ 285/* 0xa8 --> 0xb8 unused */
286#define TG3PCI_DEV_STATUS_CTRL 0x000000b4
287#define MAX_READ_REQ_SIZE_2048 0x00004000
288#define MAX_READ_REQ_MASK 0x00007000
284#define TG3PCI_DUAL_MAC_CTRL 0x000000b8 289#define TG3PCI_DUAL_MAC_CTRL 0x000000b8
285#define DUAL_MAC_CTRL_CH_MASK 0x00000003 290#define DUAL_MAC_CTRL_CH_MASK 0x00000003
286#define DUAL_MAC_CTRL_ID 0x00000004 291#define DUAL_MAC_CTRL_ID 0x00000004
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 610573855213..8184d2fca9be 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -818,6 +818,12 @@ static void fec_enet_bd_init(struct net_device *dev)
818 for (i = 0; i < txq->bd.ring_size; i++) { 818 for (i = 0; i < txq->bd.ring_size; i++) {
819 /* Initialize the BD for every fragment in the page. */ 819 /* Initialize the BD for every fragment in the page. */
820 bdp->cbd_sc = cpu_to_fec16(0); 820 bdp->cbd_sc = cpu_to_fec16(0);
821 if (bdp->cbd_bufaddr &&
822 !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
823 dma_unmap_single(&fep->pdev->dev,
824 fec32_to_cpu(bdp->cbd_bufaddr),
825 fec16_to_cpu(bdp->cbd_datlen),
826 DMA_TO_DEVICE);
821 if (txq->tx_skbuff[i]) { 827 if (txq->tx_skbuff[i]) {
822 dev_kfree_skb_any(txq->tx_skbuff[i]); 828 dev_kfree_skb_any(txq->tx_skbuff[i]);
823 txq->tx_skbuff[i] = NULL; 829 txq->tx_skbuff[i] = NULL;
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index bc93b69cfd1e..a539263cd79c 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1214,6 +1214,10 @@ static void mvneta_port_disable(struct mvneta_port *pp)
1214 val &= ~MVNETA_GMAC0_PORT_ENABLE; 1214 val &= ~MVNETA_GMAC0_PORT_ENABLE;
1215 mvreg_write(pp, MVNETA_GMAC_CTRL_0, val); 1215 mvreg_write(pp, MVNETA_GMAC_CTRL_0, val);
1216 1216
1217 pp->link = 0;
1218 pp->duplex = -1;
1219 pp->speed = 0;
1220
1217 udelay(200); 1221 udelay(200);
1218} 1222}
1219 1223
@@ -1958,9 +1962,9 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
1958 1962
1959 if (!mvneta_rxq_desc_is_first_last(rx_status) || 1963 if (!mvneta_rxq_desc_is_first_last(rx_status) ||
1960 (rx_status & MVNETA_RXD_ERR_SUMMARY)) { 1964 (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
1965 mvneta_rx_error(pp, rx_desc);
1961err_drop_frame: 1966err_drop_frame:
1962 dev->stats.rx_errors++; 1967 dev->stats.rx_errors++;
1963 mvneta_rx_error(pp, rx_desc);
1964 /* leave the descriptor untouched */ 1968 /* leave the descriptor untouched */
1965 continue; 1969 continue;
1966 } 1970 }
@@ -3011,7 +3015,7 @@ static void mvneta_cleanup_rxqs(struct mvneta_port *pp)
3011{ 3015{
3012 int queue; 3016 int queue;
3013 3017
3014 for (queue = 0; queue < txq_number; queue++) 3018 for (queue = 0; queue < rxq_number; queue++)
3015 mvneta_rxq_deinit(pp, &pp->rxqs[queue]); 3019 mvneta_rxq_deinit(pp, &pp->rxqs[queue]);
3016} 3020}
3017 3021
diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c
index 6e423f098a60..31efc47c847e 100644
--- a/drivers/net/ethernet/marvell/skge.c
+++ b/drivers/net/ethernet/marvell/skge.c
@@ -4081,7 +4081,6 @@ static void skge_remove(struct pci_dev *pdev)
4081 if (hw->ports > 1) { 4081 if (hw->ports > 1) {
4082 skge_write32(hw, B0_IMSK, 0); 4082 skge_write32(hw, B0_IMSK, 0);
4083 skge_read32(hw, B0_IMSK); 4083 skge_read32(hw, B0_IMSK);
4084 free_irq(pdev->irq, hw);
4085 } 4084 }
4086 spin_unlock_irq(&hw->hw_lock); 4085 spin_unlock_irq(&hw->hw_lock);
4087 4086
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 54adfd967858..fc67e35b253e 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1961,11 +1961,12 @@ static int mtk_hw_init(struct mtk_eth *eth)
1961 /* set GE2 TUNE */ 1961 /* set GE2 TUNE */
1962 regmap_write(eth->pctl, GPIO_BIAS_CTRL, 0x0); 1962 regmap_write(eth->pctl, GPIO_BIAS_CTRL, 0x0);
1963 1963
1964 /* GE1, Force 1000M/FD, FC ON */ 1964 /* Set linkdown as the default for each GMAC. Its own MCR would be set
1965 mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(0)); 1965 * up with the more appropriate value when mtk_phy_link_adjust call is
1966 1966 * being invoked.
1967 /* GE2, Force 1000M/FD, FC ON */ 1967 */
1968 mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(1)); 1968 for (i = 0; i < MTK_MAC_COUNT; i++)
1969 mtk_w32(eth, 0, MTK_MAC_MCR(i));
1969 1970
1970 /* Indicates CDM to parse the MTK special tag from CPU 1971 /* Indicates CDM to parse the MTK special tag from CPU
1971 * which also is working out for untag packets. 1972 * which also is working out for untag packets.
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c
index e0eb695318e6..1fa4849a6f56 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c
@@ -188,7 +188,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
188 struct net_device *dev = mdev->pndev[port]; 188 struct net_device *dev = mdev->pndev[port];
189 struct mlx4_en_priv *priv = netdev_priv(dev); 189 struct mlx4_en_priv *priv = netdev_priv(dev);
190 struct net_device_stats *stats = &dev->stats; 190 struct net_device_stats *stats = &dev->stats;
191 struct mlx4_cmd_mailbox *mailbox; 191 struct mlx4_cmd_mailbox *mailbox, *mailbox_priority;
192 u64 in_mod = reset << 8 | port; 192 u64 in_mod = reset << 8 | port;
193 int err; 193 int err;
194 int i, counter_index; 194 int i, counter_index;
@@ -198,6 +198,13 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
198 mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); 198 mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
199 if (IS_ERR(mailbox)) 199 if (IS_ERR(mailbox))
200 return PTR_ERR(mailbox); 200 return PTR_ERR(mailbox);
201
202 mailbox_priority = mlx4_alloc_cmd_mailbox(mdev->dev);
203 if (IS_ERR(mailbox_priority)) {
204 mlx4_free_cmd_mailbox(mdev->dev, mailbox);
205 return PTR_ERR(mailbox_priority);
206 }
207
201 err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, in_mod, 0, 208 err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, in_mod, 0,
202 MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B, 209 MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B,
203 MLX4_CMD_NATIVE); 210 MLX4_CMD_NATIVE);
@@ -206,6 +213,28 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
206 213
207 mlx4_en_stats = mailbox->buf; 214 mlx4_en_stats = mailbox->buf;
208 215
216 memset(&tmp_counter_stats, 0, sizeof(tmp_counter_stats));
217 counter_index = mlx4_get_default_counter_index(mdev->dev, port);
218 err = mlx4_get_counter_stats(mdev->dev, counter_index,
219 &tmp_counter_stats, reset);
220
221 /* 0xffs indicates invalid value */
222 memset(mailbox_priority->buf, 0xff,
223 sizeof(*flowstats) * MLX4_NUM_PRIORITIES);
224
225 if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) {
226 memset(mailbox_priority->buf, 0,
227 sizeof(*flowstats) * MLX4_NUM_PRIORITIES);
228 err = mlx4_cmd_box(mdev->dev, 0, mailbox_priority->dma,
229 in_mod | MLX4_DUMP_ETH_STATS_FLOW_CONTROL,
230 0, MLX4_CMD_DUMP_ETH_STATS,
231 MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
232 if (err)
233 goto out;
234 }
235
236 flowstats = mailbox_priority->buf;
237
209 spin_lock_bh(&priv->stats_lock); 238 spin_lock_bh(&priv->stats_lock);
210 239
211 mlx4_en_fold_software_stats(dev); 240 mlx4_en_fold_software_stats(dev);
@@ -345,31 +374,6 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
345 priv->pkstats.tx_prio[8][0] = be64_to_cpu(mlx4_en_stats->TTOT_novlan); 374 priv->pkstats.tx_prio[8][0] = be64_to_cpu(mlx4_en_stats->TTOT_novlan);
346 priv->pkstats.tx_prio[8][1] = be64_to_cpu(mlx4_en_stats->TOCT_novlan); 375 priv->pkstats.tx_prio[8][1] = be64_to_cpu(mlx4_en_stats->TOCT_novlan);
347 376
348 spin_unlock_bh(&priv->stats_lock);
349
350 memset(&tmp_counter_stats, 0, sizeof(tmp_counter_stats));
351 counter_index = mlx4_get_default_counter_index(mdev->dev, port);
352 err = mlx4_get_counter_stats(mdev->dev, counter_index,
353 &tmp_counter_stats, reset);
354
355 /* 0xffs indicates invalid value */
356 memset(mailbox->buf, 0xff, sizeof(*flowstats) * MLX4_NUM_PRIORITIES);
357
358 if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) {
359 memset(mailbox->buf, 0,
360 sizeof(*flowstats) * MLX4_NUM_PRIORITIES);
361 err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma,
362 in_mod | MLX4_DUMP_ETH_STATS_FLOW_CONTROL,
363 0, MLX4_CMD_DUMP_ETH_STATS,
364 MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
365 if (err)
366 goto out;
367 }
368
369 flowstats = mailbox->buf;
370
371 spin_lock_bh(&priv->stats_lock);
372
373 if (tmp_counter_stats.counter_mode == 0) { 377 if (tmp_counter_stats.counter_mode == 0) {
374 priv->pf_stats.rx_bytes = be64_to_cpu(tmp_counter_stats.rx_bytes); 378 priv->pf_stats.rx_bytes = be64_to_cpu(tmp_counter_stats.rx_bytes);
375 priv->pf_stats.tx_bytes = be64_to_cpu(tmp_counter_stats.tx_bytes); 379 priv->pf_stats.tx_bytes = be64_to_cpu(tmp_counter_stats.tx_bytes);
@@ -410,6 +414,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
410 414
411out: 415out:
412 mlx4_free_cmd_mailbox(mdev->dev, mailbox); 416 mlx4_free_cmd_mailbox(mdev->dev, mailbox);
417 mlx4_free_cmd_mailbox(mdev->dev, mailbox_priority);
413 return err; 418 return err;
414} 419}
415 420
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
index 88699b181946..946d9db7c8c2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
@@ -185,7 +185,7 @@ void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf)
185 if (priv->mdev->dev->caps.flags & 185 if (priv->mdev->dev->caps.flags &
186 MLX4_DEV_CAP_FLAG_UC_LOOPBACK) { 186 MLX4_DEV_CAP_FLAG_UC_LOOPBACK) {
187 buf[3] = mlx4_en_test_registers(priv); 187 buf[3] = mlx4_en_test_registers(priv);
188 if (priv->port_up) 188 if (priv->port_up && dev->mtu >= MLX4_SELFTEST_LB_MIN_MTU)
189 buf[4] = mlx4_en_test_loopback(priv); 189 buf[4] = mlx4_en_test_loopback(priv);
190 } 190 }
191 191
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 1856e279a7e0..2b72677eccd4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -153,6 +153,9 @@
153#define SMALL_PACKET_SIZE (256 - NET_IP_ALIGN) 153#define SMALL_PACKET_SIZE (256 - NET_IP_ALIGN)
154#define HEADER_COPY_SIZE (128 - NET_IP_ALIGN) 154#define HEADER_COPY_SIZE (128 - NET_IP_ALIGN)
155#define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN) 155#define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN)
156#define PREAMBLE_LEN 8
157#define MLX4_SELFTEST_LB_MIN_MTU (MLX4_LOOPBACK_TEST_PAYLOAD + NET_IP_ALIGN + \
158 ETH_HLEN + PREAMBLE_LEN)
156 159
157#define MLX4_EN_MIN_MTU 46 160#define MLX4_EN_MIN_MTU 46
158/* VLAN_HLEN is added twice,to support skb vlan tagged with multiple 161/* VLAN_HLEN is added twice,to support skb vlan tagged with multiple
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 04304dd894c6..606a0e0beeae 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -611,7 +611,6 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev)
611 MLX4_MAX_PORTS; 611 MLX4_MAX_PORTS;
612 else 612 else
613 res_alloc->guaranteed[t] = 0; 613 res_alloc->guaranteed[t] = 0;
614 res_alloc->res_free -= res_alloc->guaranteed[t];
615 break; 614 break;
616 default: 615 default:
617 break; 616 break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 1fffdebbc9e8..e9a1fbcc4adf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -362,7 +362,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
362 case MLX5_CMD_OP_QUERY_VPORT_COUNTER: 362 case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
363 case MLX5_CMD_OP_ALLOC_Q_COUNTER: 363 case MLX5_CMD_OP_ALLOC_Q_COUNTER:
364 case MLX5_CMD_OP_QUERY_Q_COUNTER: 364 case MLX5_CMD_OP_QUERY_Q_COUNTER:
365 case MLX5_CMD_OP_SET_RATE_LIMIT: 365 case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
366 case MLX5_CMD_OP_QUERY_RATE_LIMIT: 366 case MLX5_CMD_OP_QUERY_RATE_LIMIT:
367 case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: 367 case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
368 case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: 368 case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
@@ -505,7 +505,7 @@ const char *mlx5_command_str(int command)
505 MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER); 505 MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
506 MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER); 506 MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
507 MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); 507 MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
508 MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT); 508 MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
509 MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT); 509 MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
510 MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT); 510 MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
511 MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT); 511 MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index c0872b3284cb..543060c305a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -82,6 +82,9 @@
82 max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req) 82 max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req)
83#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6) 83#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6)
84#define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8) 84#define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8)
85#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \
86 (cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \
87 MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev))
85 88
86#define MLX5_MPWRQ_LOG_WQE_SZ 18 89#define MLX5_MPWRQ_LOG_WQE_SZ 18
87#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ 90#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
@@ -590,6 +593,7 @@ struct mlx5e_channel {
590 struct mlx5_core_dev *mdev; 593 struct mlx5_core_dev *mdev;
591 struct hwtstamp_config *tstamp; 594 struct hwtstamp_config *tstamp;
592 int ix; 595 int ix;
596 int cpu;
593}; 597};
594 598
595struct mlx5e_channels { 599struct mlx5e_channels {
@@ -935,8 +939,9 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params,
935 u8 cq_period_mode); 939 u8 cq_period_mode);
936void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, 940void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
937 u8 cq_period_mode); 941 u8 cq_period_mode);
938void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, 942void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
939 struct mlx5e_params *params, u8 rq_type); 943 struct mlx5e_params *params,
944 u8 rq_type);
940 945
941static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) 946static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
942{ 947{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index c6d90b6dd80e..9bcf38f4123b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -274,6 +274,7 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
274static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, 274static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
275 struct ieee_ets *ets) 275 struct ieee_ets *ets)
276{ 276{
277 bool have_ets_tc = false;
277 int bw_sum = 0; 278 int bw_sum = 0;
278 int i; 279 int i;
279 280
@@ -288,11 +289,14 @@ static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
288 } 289 }
289 290
290 /* Validate Bandwidth Sum */ 291 /* Validate Bandwidth Sum */
291 for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) 292 for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
292 if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) 293 if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
294 have_ets_tc = true;
293 bw_sum += ets->tc_tx_bw[i]; 295 bw_sum += ets->tc_tx_bw[i];
296 }
297 }
294 298
295 if (bw_sum != 0 && bw_sum != 100) { 299 if (have_ets_tc && bw_sum != 100) {
296 netdev_err(netdev, 300 netdev_err(netdev,
297 "Failed to validate ETS: BW sum is illegal\n"); 301 "Failed to validate ETS: BW sum is illegal\n");
298 return -EINVAL; 302 return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 23425f028405..8f05efa5c829 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1523,8 +1523,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
1523 new_channels.params = priv->channels.params; 1523 new_channels.params = priv->channels.params;
1524 MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); 1524 MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
1525 1525
1526 mlx5e_set_rq_type_params(priv->mdev, &new_channels.params, 1526 new_channels.params.mpwqe_log_stride_sz =
1527 new_channels.params.rq_wq_type); 1527 MLX5E_MPWQE_STRIDE_SZ(priv->mdev, new_val);
1528 new_channels.params.mpwqe_log_num_strides =
1529 MLX5_MPWRQ_LOG_WQE_SZ - new_channels.params.mpwqe_log_stride_sz;
1528 1530
1529 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { 1531 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
1530 priv->channels.params = new_channels.params; 1532 priv->channels.params = new_channels.params;
@@ -1536,6 +1538,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
1536 return err; 1538 return err;
1537 1539
1538 mlx5e_switch_priv_channels(priv, &new_channels, NULL); 1540 mlx5e_switch_priv_channels(priv, &new_channels, NULL);
1541 mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n",
1542 MLX5E_GET_PFLAG(&priv->channels.params,
1543 MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF");
1544
1539 return 0; 1545 return 0;
1540} 1546}
1541 1547
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d2b057a3e512..d9d8227f195f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -71,11 +71,6 @@ struct mlx5e_channel_param {
71 struct mlx5e_cq_param icosq_cq; 71 struct mlx5e_cq_param icosq_cq;
72}; 72};
73 73
74static int mlx5e_get_node(struct mlx5e_priv *priv, int ix)
75{
76 return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix);
77}
78
79static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) 74static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
80{ 75{
81 return MLX5_CAP_GEN(mdev, striding_rq) && 76 return MLX5_CAP_GEN(mdev, striding_rq) &&
@@ -83,8 +78,8 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
83 MLX5_CAP_ETH(mdev, reg_umr_sq); 78 MLX5_CAP_ETH(mdev, reg_umr_sq);
84} 79}
85 80
86void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, 81void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
87 struct mlx5e_params *params, u8 rq_type) 82 struct mlx5e_params *params, u8 rq_type)
88{ 83{
89 params->rq_wq_type = rq_type; 84 params->rq_wq_type = rq_type;
90 params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; 85 params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
@@ -93,10 +88,8 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
93 params->log_rq_size = is_kdump_kernel() ? 88 params->log_rq_size = is_kdump_kernel() ?
94 MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW : 89 MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW :
95 MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; 90 MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
96 params->mpwqe_log_stride_sz = 91 params->mpwqe_log_stride_sz = MLX5E_MPWQE_STRIDE_SZ(mdev,
97 MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ? 92 MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
98 MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) :
99 MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
100 params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - 93 params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
101 params->mpwqe_log_stride_sz; 94 params->mpwqe_log_stride_sz;
102 break; 95 break;
@@ -120,13 +113,14 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
120 MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); 113 MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
121} 114}
122 115
123static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) 116static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev,
117 struct mlx5e_params *params)
124{ 118{
125 u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) && 119 u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) &&
126 !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ? 120 !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ?
127 MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : 121 MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
128 MLX5_WQ_TYPE_LINKED_LIST; 122 MLX5_WQ_TYPE_LINKED_LIST;
129 mlx5e_set_rq_type_params(mdev, params, rq_type); 123 mlx5e_init_rq_type_params(mdev, params, rq_type);
130} 124}
131 125
132static void mlx5e_update_carrier(struct mlx5e_priv *priv) 126static void mlx5e_update_carrier(struct mlx5e_priv *priv)
@@ -444,17 +438,16 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
444 int wq_sz = mlx5_wq_ll_get_size(&rq->wq); 438 int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
445 int mtt_sz = mlx5e_get_wqe_mtt_sz(); 439 int mtt_sz = mlx5e_get_wqe_mtt_sz();
446 int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1; 440 int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
447 int node = mlx5e_get_node(c->priv, c->ix);
448 int i; 441 int i;
449 442
450 rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info), 443 rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
451 GFP_KERNEL, node); 444 GFP_KERNEL, cpu_to_node(c->cpu));
452 if (!rq->mpwqe.info) 445 if (!rq->mpwqe.info)
453 goto err_out; 446 goto err_out;
454 447
455 /* We allocate more than mtt_sz as we will align the pointer */ 448 /* We allocate more than mtt_sz as we will align the pointer */
456 rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, 449 rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
457 GFP_KERNEL, node); 450 cpu_to_node(c->cpu));
458 if (unlikely(!rq->mpwqe.mtt_no_align)) 451 if (unlikely(!rq->mpwqe.mtt_no_align))
459 goto err_free_wqe_info; 452 goto err_free_wqe_info;
460 453
@@ -562,7 +555,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
562 int err; 555 int err;
563 int i; 556 int i;
564 557
565 rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); 558 rqp->wq.db_numa_node = cpu_to_node(c->cpu);
566 559
567 err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq, 560 err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq,
568 &rq->wq_ctrl); 561 &rq->wq_ctrl);
@@ -629,8 +622,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
629 default: /* MLX5_WQ_TYPE_LINKED_LIST */ 622 default: /* MLX5_WQ_TYPE_LINKED_LIST */
630 rq->wqe.frag_info = 623 rq->wqe.frag_info =
631 kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info), 624 kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info),
632 GFP_KERNEL, 625 GFP_KERNEL, cpu_to_node(c->cpu));
633 mlx5e_get_node(c->priv, c->ix));
634 if (!rq->wqe.frag_info) { 626 if (!rq->wqe.frag_info) {
635 err = -ENOMEM; 627 err = -ENOMEM;
636 goto err_rq_wq_destroy; 628 goto err_rq_wq_destroy;
@@ -1000,13 +992,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
1000 sq->uar_map = mdev->mlx5e_res.bfreg.map; 992 sq->uar_map = mdev->mlx5e_res.bfreg.map;
1001 sq->min_inline_mode = params->tx_min_inline_mode; 993 sq->min_inline_mode = params->tx_min_inline_mode;
1002 994
1003 param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); 995 param->wq.db_numa_node = cpu_to_node(c->cpu);
1004 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); 996 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
1005 if (err) 997 if (err)
1006 return err; 998 return err;
1007 sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; 999 sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1008 1000
1009 err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix)); 1001 err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
1010 if (err) 1002 if (err)
1011 goto err_sq_wq_destroy; 1003 goto err_sq_wq_destroy;
1012 1004
@@ -1053,13 +1045,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
1053 sq->channel = c; 1045 sq->channel = c;
1054 sq->uar_map = mdev->mlx5e_res.bfreg.map; 1046 sq->uar_map = mdev->mlx5e_res.bfreg.map;
1055 1047
1056 param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); 1048 param->wq.db_numa_node = cpu_to_node(c->cpu);
1057 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); 1049 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
1058 if (err) 1050 if (err)
1059 return err; 1051 return err;
1060 sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; 1052 sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1061 1053
1062 err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix)); 1054 err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
1063 if (err) 1055 if (err)
1064 goto err_sq_wq_destroy; 1056 goto err_sq_wq_destroy;
1065 1057
@@ -1126,13 +1118,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
1126 if (MLX5_IPSEC_DEV(c->priv->mdev)) 1118 if (MLX5_IPSEC_DEV(c->priv->mdev))
1127 set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); 1119 set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
1128 1120
1129 param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); 1121 param->wq.db_numa_node = cpu_to_node(c->cpu);
1130 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); 1122 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
1131 if (err) 1123 if (err)
1132 return err; 1124 return err;
1133 sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; 1125 sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1134 1126
1135 err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix)); 1127 err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
1136 if (err) 1128 if (err)
1137 goto err_sq_wq_destroy; 1129 goto err_sq_wq_destroy;
1138 1130
@@ -1504,8 +1496,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c,
1504 struct mlx5_core_dev *mdev = c->priv->mdev; 1496 struct mlx5_core_dev *mdev = c->priv->mdev;
1505 int err; 1497 int err;
1506 1498
1507 param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix); 1499 param->wq.buf_numa_node = cpu_to_node(c->cpu);
1508 param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); 1500 param->wq.db_numa_node = cpu_to_node(c->cpu);
1509 param->eq_ix = c->ix; 1501 param->eq_ix = c->ix;
1510 1502
1511 err = mlx5e_alloc_cq_common(mdev, param, cq); 1503 err = mlx5e_alloc_cq_common(mdev, param, cq);
@@ -1604,6 +1596,11 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
1604 mlx5e_free_cq(cq); 1596 mlx5e_free_cq(cq);
1605} 1597}
1606 1598
1599static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
1600{
1601 return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
1602}
1603
1607static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, 1604static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1608 struct mlx5e_params *params, 1605 struct mlx5e_params *params,
1609 struct mlx5e_channel_param *cparam) 1606 struct mlx5e_channel_param *cparam)
@@ -1752,12 +1749,13 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1752{ 1749{
1753 struct mlx5e_cq_moder icocq_moder = {0, 0}; 1750 struct mlx5e_cq_moder icocq_moder = {0, 0};
1754 struct net_device *netdev = priv->netdev; 1751 struct net_device *netdev = priv->netdev;
1752 int cpu = mlx5e_get_cpu(priv, ix);
1755 struct mlx5e_channel *c; 1753 struct mlx5e_channel *c;
1756 unsigned int irq; 1754 unsigned int irq;
1757 int err; 1755 int err;
1758 int eqn; 1756 int eqn;
1759 1757
1760 c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix)); 1758 c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
1761 if (!c) 1759 if (!c)
1762 return -ENOMEM; 1760 return -ENOMEM;
1763 1761
@@ -1765,6 +1763,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1765 c->mdev = priv->mdev; 1763 c->mdev = priv->mdev;
1766 c->tstamp = &priv->tstamp; 1764 c->tstamp = &priv->tstamp;
1767 c->ix = ix; 1765 c->ix = ix;
1766 c->cpu = cpu;
1768 c->pdev = &priv->mdev->pdev->dev; 1767 c->pdev = &priv->mdev->pdev->dev;
1769 c->netdev = priv->netdev; 1768 c->netdev = priv->netdev;
1770 c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); 1769 c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
@@ -1853,8 +1852,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
1853 for (tc = 0; tc < c->num_tc; tc++) 1852 for (tc = 0; tc < c->num_tc; tc++)
1854 mlx5e_activate_txqsq(&c->sq[tc]); 1853 mlx5e_activate_txqsq(&c->sq[tc]);
1855 mlx5e_activate_rq(&c->rq); 1854 mlx5e_activate_rq(&c->rq);
1856 netif_set_xps_queue(c->netdev, 1855 netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
1857 mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix);
1858} 1856}
1859 1857
1860static void mlx5e_deactivate_channel(struct mlx5e_channel *c) 1858static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
@@ -3679,6 +3677,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
3679 struct sk_buff *skb, 3677 struct sk_buff *skb,
3680 netdev_features_t features) 3678 netdev_features_t features)
3681{ 3679{
3680 unsigned int offset = 0;
3682 struct udphdr *udph; 3681 struct udphdr *udph;
3683 u8 proto; 3682 u8 proto;
3684 u16 port; 3683 u16 port;
@@ -3688,7 +3687,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
3688 proto = ip_hdr(skb)->protocol; 3687 proto = ip_hdr(skb)->protocol;
3689 break; 3688 break;
3690 case htons(ETH_P_IPV6): 3689 case htons(ETH_P_IPV6):
3691 proto = ipv6_hdr(skb)->nexthdr; 3690 proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
3692 break; 3691 break;
3693 default: 3692 default:
3694 goto out; 3693 goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 60771865c99c..e7e7cef2bde4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -466,7 +466,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
466 break; 466 break;
467 case MLX5_EVENT_TYPE_CQ_ERROR: 467 case MLX5_EVENT_TYPE_CQ_ERROR:
468 cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; 468 cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
469 mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrom 0x%x\n", 469 mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
470 cqn, eqe->data.cq_err.syndrome); 470 cqn, eqe->data.cq_err.syndrome);
471 mlx5_cq_event(dev, cqn, eqe->type); 471 mlx5_cq_event(dev, cqn, eqe->type);
472 break; 472 break;
@@ -775,7 +775,7 @@ err1:
775 return err; 775 return err;
776} 776}
777 777
778int mlx5_stop_eqs(struct mlx5_core_dev *dev) 778void mlx5_stop_eqs(struct mlx5_core_dev *dev)
779{ 779{
780 struct mlx5_eq_table *table = &dev->priv.eq_table; 780 struct mlx5_eq_table *table = &dev->priv.eq_table;
781 int err; 781 int err;
@@ -784,22 +784,26 @@ int mlx5_stop_eqs(struct mlx5_core_dev *dev)
784 if (MLX5_CAP_GEN(dev, pg)) { 784 if (MLX5_CAP_GEN(dev, pg)) {
785 err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq); 785 err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
786 if (err) 786 if (err)
787 return err; 787 mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
788 err);
788 } 789 }
789#endif 790#endif
790 791
791 err = mlx5_destroy_unmap_eq(dev, &table->pages_eq); 792 err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
792 if (err) 793 if (err)
793 return err; 794 mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
795 err);
794 796
795 mlx5_destroy_unmap_eq(dev, &table->async_eq); 797 err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
798 if (err)
799 mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
800 err);
796 mlx5_cmd_use_polling(dev); 801 mlx5_cmd_use_polling(dev);
797 802
798 err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq); 803 err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
799 if (err) 804 if (err)
800 mlx5_cmd_use_events(dev); 805 mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
801 806 err);
802 return err;
803} 807}
804 808
805int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, 809int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
index 3c11d6e2160a..14962969c5ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
@@ -66,6 +66,9 @@ static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size,
66 u8 actual_size; 66 u8 actual_size;
67 int err; 67 int err;
68 68
69 if (!size)
70 return -EINVAL;
71
69 if (!fdev->mdev) 72 if (!fdev->mdev)
70 return -ENOTCONN; 73 return -ENOTCONN;
71 74
@@ -95,6 +98,9 @@ static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size,
95 u8 actual_size; 98 u8 actual_size;
96 int err; 99 int err;
97 100
101 if (!size)
102 return -EINVAL;
103
98 if (!fdev->mdev) 104 if (!fdev->mdev)
99 return -ENOTCONN; 105 return -ENOTCONN;
100 106
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index c70fd663a633..dfaad9ecb2b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -174,6 +174,8 @@ static void del_hw_fte(struct fs_node *node);
174static void del_sw_flow_table(struct fs_node *node); 174static void del_sw_flow_table(struct fs_node *node);
175static void del_sw_flow_group(struct fs_node *node); 175static void del_sw_flow_group(struct fs_node *node);
176static void del_sw_fte(struct fs_node *node); 176static void del_sw_fte(struct fs_node *node);
177static void del_sw_prio(struct fs_node *node);
178static void del_sw_ns(struct fs_node *node);
177/* Delete rule (destination) is special case that 179/* Delete rule (destination) is special case that
178 * requires to lock the FTE for all the deletion process. 180 * requires to lock the FTE for all the deletion process.
179 */ 181 */
@@ -408,6 +410,16 @@ static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
408 return NULL; 410 return NULL;
409} 411}
410 412
413static void del_sw_ns(struct fs_node *node)
414{
415 kfree(node);
416}
417
418static void del_sw_prio(struct fs_node *node)
419{
420 kfree(node);
421}
422
411static void del_hw_flow_table(struct fs_node *node) 423static void del_hw_flow_table(struct fs_node *node)
412{ 424{
413 struct mlx5_flow_table *ft; 425 struct mlx5_flow_table *ft;
@@ -2064,7 +2076,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
2064 return ERR_PTR(-ENOMEM); 2076 return ERR_PTR(-ENOMEM);
2065 2077
2066 fs_prio->node.type = FS_TYPE_PRIO; 2078 fs_prio->node.type = FS_TYPE_PRIO;
2067 tree_init_node(&fs_prio->node, NULL, NULL); 2079 tree_init_node(&fs_prio->node, NULL, del_sw_prio);
2068 tree_add_node(&fs_prio->node, &ns->node); 2080 tree_add_node(&fs_prio->node, &ns->node);
2069 fs_prio->num_levels = num_levels; 2081 fs_prio->num_levels = num_levels;
2070 fs_prio->prio = prio; 2082 fs_prio->prio = prio;
@@ -2090,7 +2102,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
2090 return ERR_PTR(-ENOMEM); 2102 return ERR_PTR(-ENOMEM);
2091 2103
2092 fs_init_namespace(ns); 2104 fs_init_namespace(ns);
2093 tree_init_node(&ns->node, NULL, NULL); 2105 tree_init_node(&ns->node, NULL, del_sw_ns);
2094 tree_add_node(&ns->node, &prio->node); 2106 tree_add_node(&ns->node, &prio->node);
2095 list_add_tail(&ns->node.list, &prio->node.children); 2107 list_add_tail(&ns->node.list, &prio->node.children);
2096 2108
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 1a0e797ad001..21d29f7936f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -241,7 +241,7 @@ static void print_health_info(struct mlx5_core_dev *dev)
241 u32 fw; 241 u32 fw;
242 int i; 242 int i;
243 243
244 /* If the syndrom is 0, the device is OK and no need to print buffer */ 244 /* If the syndrome is 0, the device is OK and no need to print buffer */
245 if (!ioread8(&h->synd)) 245 if (!ioread8(&h->synd))
246 return; 246 return;
247 247
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index d2a66dc4adc6..8812d7208e8f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -57,7 +57,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
57 struct mlx5e_params *params) 57 struct mlx5e_params *params)
58{ 58{
59 /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */ 59 /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */
60 mlx5e_set_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST); 60 mlx5e_init_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST);
61 61
62 /* RQ size in ipoib by default is 512 */ 62 /* RQ size in ipoib by default is 512 */
63 params->log_rq_size = is_kdump_kernel() ? 63 params->log_rq_size = is_kdump_kernel() ?
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index f26f97fe4666..582b2f18010a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -137,6 +137,17 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
137} 137}
138EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); 138EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
139 139
140static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
141 bool reset, void *out, int out_size)
142{
143 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
144
145 MLX5_SET(query_cong_statistics_in, in, opcode,
146 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
147 MLX5_SET(query_cong_statistics_in, in, clear, reset);
148 return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
149}
150
140static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev) 151static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev)
141{ 152{
142 return dev->priv.lag; 153 return dev->priv.lag;
@@ -633,3 +644,48 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
633 /* If bonded, we do not add an IB device for PF1. */ 644 /* If bonded, we do not add an IB device for PF1. */
634 return false; 645 return false;
635} 646}
647
648int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
649 u64 *values,
650 int num_counters,
651 size_t *offsets)
652{
653 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
654 struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
655 struct mlx5_lag *ldev;
656 int num_ports;
657 int ret, i, j;
658 void *out;
659
660 out = kvzalloc(outlen, GFP_KERNEL);
661 if (!out)
662 return -ENOMEM;
663
664 memset(values, 0, sizeof(*values) * num_counters);
665
666 mutex_lock(&lag_mutex);
667 ldev = mlx5_lag_dev_get(dev);
668 if (ldev && mlx5_lag_is_bonded(ldev)) {
669 num_ports = MLX5_MAX_PORTS;
670 mdev[0] = ldev->pf[0].dev;
671 mdev[1] = ldev->pf[1].dev;
672 } else {
673 num_ports = 1;
674 mdev[0] = dev;
675 }
676
677 for (i = 0; i < num_ports; ++i) {
678 ret = mlx5_cmd_query_cong_counter(mdev[i], false, out, outlen);
679 if (ret)
680 goto unlock;
681
682 for (j = 0; j < num_counters; ++j)
683 values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
684 }
685
686unlock:
687 mutex_unlock(&lag_mutex);
688 kvfree(out);
689 return ret;
690}
691EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 5f323442cc5a..8a89c7e8cd63 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -317,9 +317,6 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
317{ 317{
318 struct mlx5_priv *priv = &dev->priv; 318 struct mlx5_priv *priv = &dev->priv;
319 struct mlx5_eq_table *table = &priv->eq_table; 319 struct mlx5_eq_table *table = &priv->eq_table;
320 struct irq_affinity irqdesc = {
321 .pre_vectors = MLX5_EQ_VEC_COMP_BASE,
322 };
323 int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq); 320 int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
324 int nvec; 321 int nvec;
325 322
@@ -333,10 +330,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
333 if (!priv->irq_info) 330 if (!priv->irq_info)
334 goto err_free_msix; 331 goto err_free_msix;
335 332
336 nvec = pci_alloc_irq_vectors_affinity(dev->pdev, 333 nvec = pci_alloc_irq_vectors(dev->pdev,
337 MLX5_EQ_VEC_COMP_BASE + 1, nvec, 334 MLX5_EQ_VEC_COMP_BASE + 1, nvec,
338 PCI_IRQ_MSIX | PCI_IRQ_AFFINITY, 335 PCI_IRQ_MSIX);
339 &irqdesc);
340 if (nvec < 0) 336 if (nvec < 0)
341 return nvec; 337 return nvec;
342 338
@@ -622,6 +618,63 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
622 return (u64)timer_l | (u64)timer_h1 << 32; 618 return (u64)timer_l | (u64)timer_h1 << 32;
623} 619}
624 620
621static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
622{
623 struct mlx5_priv *priv = &mdev->priv;
624 int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
625
626 if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
627 mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
628 return -ENOMEM;
629 }
630
631 cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
632 priv->irq_info[i].mask);
633
634 if (IS_ENABLED(CONFIG_SMP) &&
635 irq_set_affinity_hint(irq, priv->irq_info[i].mask))
636 mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
637
638 return 0;
639}
640
641static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
642{
643 struct mlx5_priv *priv = &mdev->priv;
644 int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
645
646 irq_set_affinity_hint(irq, NULL);
647 free_cpumask_var(priv->irq_info[i].mask);
648}
649
650static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
651{
652 int err;
653 int i;
654
655 for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
656 err = mlx5_irq_set_affinity_hint(mdev, i);
657 if (err)
658 goto err_out;
659 }
660
661 return 0;
662
663err_out:
664 for (i--; i >= 0; i--)
665 mlx5_irq_clear_affinity_hint(mdev, i);
666
667 return err;
668}
669
670static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
671{
672 int i;
673
674 for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
675 mlx5_irq_clear_affinity_hint(mdev, i);
676}
677
625int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, 678int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
626 unsigned int *irqn) 679 unsigned int *irqn)
627{ 680{
@@ -1097,6 +1150,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
1097 goto err_stop_eqs; 1150 goto err_stop_eqs;
1098 } 1151 }
1099 1152
1153 err = mlx5_irq_set_affinity_hints(dev);
1154 if (err) {
1155 dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
1156 goto err_affinity_hints;
1157 }
1158
1100 err = mlx5_init_fs(dev); 1159 err = mlx5_init_fs(dev);
1101 if (err) { 1160 if (err) {
1102 dev_err(&pdev->dev, "Failed to init flow steering\n"); 1161 dev_err(&pdev->dev, "Failed to init flow steering\n");
@@ -1154,6 +1213,9 @@ err_sriov:
1154 mlx5_cleanup_fs(dev); 1213 mlx5_cleanup_fs(dev);
1155 1214
1156err_fs: 1215err_fs:
1216 mlx5_irq_clear_affinity_hints(dev);
1217
1218err_affinity_hints:
1157 free_comp_eqs(dev); 1219 free_comp_eqs(dev);
1158 1220
1159err_stop_eqs: 1221err_stop_eqs:
@@ -1222,6 +1284,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
1222 1284
1223 mlx5_sriov_detach(dev); 1285 mlx5_sriov_detach(dev);
1224 mlx5_cleanup_fs(dev); 1286 mlx5_cleanup_fs(dev);
1287 mlx5_irq_clear_affinity_hints(dev);
1225 free_comp_eqs(dev); 1288 free_comp_eqs(dev);
1226 mlx5_stop_eqs(dev); 1289 mlx5_stop_eqs(dev);
1227 mlx5_put_uars_page(dev, priv->uar); 1290 mlx5_put_uars_page(dev, priv->uar);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index db9e665ab104..889130edb715 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -213,8 +213,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
213err_cmd: 213err_cmd:
214 memset(din, 0, sizeof(din)); 214 memset(din, 0, sizeof(din));
215 memset(dout, 0, sizeof(dout)); 215 memset(dout, 0, sizeof(dout));
216 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); 216 MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
217 MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); 217 MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
218 mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); 218 mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
219 return err; 219 return err;
220} 220}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
index e651e4c02867..d3c33e9eea72 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -125,16 +125,16 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
125 return ret_entry; 125 return ret_entry;
126} 126}
127 127
128static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev, 128static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev,
129 u32 rate, u16 index) 129 u32 rate, u16 index)
130{ 130{
131 u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)] = {0}; 131 u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {0};
132 u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0}; 132 u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0};
133 133
134 MLX5_SET(set_rate_limit_in, in, opcode, 134 MLX5_SET(set_pp_rate_limit_in, in, opcode,
135 MLX5_CMD_OP_SET_RATE_LIMIT); 135 MLX5_CMD_OP_SET_PP_RATE_LIMIT);
136 MLX5_SET(set_rate_limit_in, in, rate_limit_index, index); 136 MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index);
137 MLX5_SET(set_rate_limit_in, in, rate_limit, rate); 137 MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rate);
138 return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); 138 return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
139} 139}
140 140
@@ -173,7 +173,7 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index)
173 entry->refcount++; 173 entry->refcount++;
174 } else { 174 } else {
175 /* new rate limit */ 175 /* new rate limit */
176 err = mlx5_set_rate_limit_cmd(dev, rate, entry->index); 176 err = mlx5_set_pp_rate_limit_cmd(dev, rate, entry->index);
177 if (err) { 177 if (err) {
178 mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n", 178 mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n",
179 rate, err); 179 rate, err);
@@ -209,7 +209,7 @@ void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate)
209 entry->refcount--; 209 entry->refcount--;
210 if (!entry->refcount) { 210 if (!entry->refcount) {
211 /* need to remove rate */ 211 /* need to remove rate */
212 mlx5_set_rate_limit_cmd(dev, 0, entry->index); 212 mlx5_set_pp_rate_limit_cmd(dev, 0, entry->index);
213 entry->rate = 0; 213 entry->rate = 0;
214 } 214 }
215 215
@@ -262,8 +262,8 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev)
262 /* Clear all configured rates */ 262 /* Clear all configured rates */
263 for (i = 0; i < table->max_size; i++) 263 for (i = 0; i < table->max_size; i++)
264 if (table->rl_entry[i].rate) 264 if (table->rl_entry[i].rate)
265 mlx5_set_rate_limit_cmd(dev, 0, 265 mlx5_set_pp_rate_limit_cmd(dev, 0,
266 table->rl_entry[i].index); 266 table->rl_entry[i].index);
267 267
268 kfree(dev->priv.rl_table.rl_entry); 268 kfree(dev->priv.rl_table.rl_entry);
269} 269}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
index 07a9ba6cfc70..2f74953e4561 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
@@ -71,9 +71,9 @@ struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port)
71 struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; 71 struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
72 struct mlx5e_vxlan *vxlan; 72 struct mlx5e_vxlan *vxlan;
73 73
74 spin_lock(&vxlan_db->lock); 74 spin_lock_bh(&vxlan_db->lock);
75 vxlan = radix_tree_lookup(&vxlan_db->tree, port); 75 vxlan = radix_tree_lookup(&vxlan_db->tree, port);
76 spin_unlock(&vxlan_db->lock); 76 spin_unlock_bh(&vxlan_db->lock);
77 77
78 return vxlan; 78 return vxlan;
79} 79}
@@ -88,8 +88,12 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
88 struct mlx5e_vxlan *vxlan; 88 struct mlx5e_vxlan *vxlan;
89 int err; 89 int err;
90 90
91 if (mlx5e_vxlan_lookup_port(priv, port)) 91 mutex_lock(&priv->state_lock);
92 vxlan = mlx5e_vxlan_lookup_port(priv, port);
93 if (vxlan) {
94 atomic_inc(&vxlan->refcount);
92 goto free_work; 95 goto free_work;
96 }
93 97
94 if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port)) 98 if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port))
95 goto free_work; 99 goto free_work;
@@ -99,10 +103,11 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
99 goto err_delete_port; 103 goto err_delete_port;
100 104
101 vxlan->udp_port = port; 105 vxlan->udp_port = port;
106 atomic_set(&vxlan->refcount, 1);
102 107
103 spin_lock_irq(&vxlan_db->lock); 108 spin_lock_bh(&vxlan_db->lock);
104 err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan); 109 err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan);
105 spin_unlock_irq(&vxlan_db->lock); 110 spin_unlock_bh(&vxlan_db->lock);
106 if (err) 111 if (err)
107 goto err_free; 112 goto err_free;
108 113
@@ -113,35 +118,39 @@ err_free:
113err_delete_port: 118err_delete_port:
114 mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); 119 mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
115free_work: 120free_work:
121 mutex_unlock(&priv->state_lock);
116 kfree(vxlan_work); 122 kfree(vxlan_work);
117} 123}
118 124
119static void __mlx5e_vxlan_core_del_port(struct mlx5e_priv *priv, u16 port) 125static void mlx5e_vxlan_del_port(struct work_struct *work)
120{ 126{
127 struct mlx5e_vxlan_work *vxlan_work =
128 container_of(work, struct mlx5e_vxlan_work, work);
129 struct mlx5e_priv *priv = vxlan_work->priv;
121 struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; 130 struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
131 u16 port = vxlan_work->port;
122 struct mlx5e_vxlan *vxlan; 132 struct mlx5e_vxlan *vxlan;
133 bool remove = false;
123 134
124 spin_lock_irq(&vxlan_db->lock); 135 mutex_lock(&priv->state_lock);
125 vxlan = radix_tree_delete(&vxlan_db->tree, port); 136 spin_lock_bh(&vxlan_db->lock);
126 spin_unlock_irq(&vxlan_db->lock); 137 vxlan = radix_tree_lookup(&vxlan_db->tree, port);
127
128 if (!vxlan) 138 if (!vxlan)
129 return; 139 goto out_unlock;
130
131 mlx5e_vxlan_core_del_port_cmd(priv->mdev, vxlan->udp_port);
132
133 kfree(vxlan);
134}
135 140
136static void mlx5e_vxlan_del_port(struct work_struct *work) 141 if (atomic_dec_and_test(&vxlan->refcount)) {
137{ 142 radix_tree_delete(&vxlan_db->tree, port);
138 struct mlx5e_vxlan_work *vxlan_work = 143 remove = true;
139 container_of(work, struct mlx5e_vxlan_work, work); 144 }
140 struct mlx5e_priv *priv = vxlan_work->priv;
141 u16 port = vxlan_work->port;
142 145
143 __mlx5e_vxlan_core_del_port(priv, port); 146out_unlock:
147 spin_unlock_bh(&vxlan_db->lock);
144 148
149 if (remove) {
150 mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
151 kfree(vxlan);
152 }
153 mutex_unlock(&priv->state_lock);
145 kfree(vxlan_work); 154 kfree(vxlan_work);
146} 155}
147 156
@@ -171,12 +180,11 @@ void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv)
171 struct mlx5e_vxlan *vxlan; 180 struct mlx5e_vxlan *vxlan;
172 unsigned int port = 0; 181 unsigned int port = 0;
173 182
174 spin_lock_irq(&vxlan_db->lock); 183 /* Lockless since we are the only radix-tree consumers, wq is disabled */
175 while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) { 184 while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) {
176 port = vxlan->udp_port; 185 port = vxlan->udp_port;
177 spin_unlock_irq(&vxlan_db->lock); 186 radix_tree_delete(&vxlan_db->tree, port);
178 __mlx5e_vxlan_core_del_port(priv, (u16)port); 187 mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
179 spin_lock_irq(&vxlan_db->lock); 188 kfree(vxlan);
180 } 189 }
181 spin_unlock_irq(&vxlan_db->lock);
182} 190}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
index 5def12c048e3..5ef6ae7d568a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
@@ -36,6 +36,7 @@
36#include "en.h" 36#include "en.h"
37 37
38struct mlx5e_vxlan { 38struct mlx5e_vxlan {
39 atomic_t refcount;
39 u16 udp_port; 40 u16 udp_port;
40}; 41};
41 42
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 2d0897b7d860..9bd8d28de152 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4300,6 +4300,7 @@ static int mlxsw_sp_port_stp_set(struct mlxsw_sp_port *mlxsw_sp_port,
4300 4300
4301static int mlxsw_sp_port_ovs_join(struct mlxsw_sp_port *mlxsw_sp_port) 4301static int mlxsw_sp_port_ovs_join(struct mlxsw_sp_port *mlxsw_sp_port)
4302{ 4302{
4303 u16 vid = 1;
4303 int err; 4304 int err;
4304 4305
4305 err = mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, true); 4306 err = mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, true);
@@ -4312,8 +4313,19 @@ static int mlxsw_sp_port_ovs_join(struct mlxsw_sp_port *mlxsw_sp_port)
4312 true, false); 4313 true, false);
4313 if (err) 4314 if (err)
4314 goto err_port_vlan_set; 4315 goto err_port_vlan_set;
4316
4317 for (; vid <= VLAN_N_VID - 1; vid++) {
4318 err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port,
4319 vid, false);
4320 if (err)
4321 goto err_vid_learning_set;
4322 }
4323
4315 return 0; 4324 return 0;
4316 4325
4326err_vid_learning_set:
4327 for (vid--; vid >= 1; vid--)
4328 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
4317err_port_vlan_set: 4329err_port_vlan_set:
4318 mlxsw_sp_port_stp_set(mlxsw_sp_port, false); 4330 mlxsw_sp_port_stp_set(mlxsw_sp_port, false);
4319err_port_stp_set: 4331err_port_stp_set:
@@ -4323,6 +4335,12 @@ err_port_stp_set:
4323 4335
4324static void mlxsw_sp_port_ovs_leave(struct mlxsw_sp_port *mlxsw_sp_port) 4336static void mlxsw_sp_port_ovs_leave(struct mlxsw_sp_port *mlxsw_sp_port)
4325{ 4337{
4338 u16 vid;
4339
4340 for (vid = VLAN_N_VID - 1; vid >= 1; vid--)
4341 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port,
4342 vid, true);
4343
4326 mlxsw_sp_port_vlan_set(mlxsw_sp_port, 2, VLAN_N_VID - 1, 4344 mlxsw_sp_port_vlan_set(mlxsw_sp_port, 2, VLAN_N_VID - 1,
4327 false, false); 4345 false, false);
4328 mlxsw_sp_port_stp_set(mlxsw_sp_port, false); 4346 mlxsw_sp_port_stp_set(mlxsw_sp_port, false);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 72ef4f8025f0..be657b8533f0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2436,25 +2436,16 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2436 rhashtable_destroy(&mlxsw_sp->router->neigh_ht); 2436 rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2437} 2437}
2438 2438
2439static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
2440 const struct mlxsw_sp_rif *rif)
2441{
2442 char rauht_pl[MLXSW_REG_RAUHT_LEN];
2443
2444 mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
2445 rif->rif_index, rif->addr);
2446 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2447}
2448
2449static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, 2439static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2450 struct mlxsw_sp_rif *rif) 2440 struct mlxsw_sp_rif *rif)
2451{ 2441{
2452 struct mlxsw_sp_neigh_entry *neigh_entry, *tmp; 2442 struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2453 2443
2454 mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif);
2455 list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list, 2444 list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2456 rif_list_node) 2445 rif_list_node) {
2446 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2457 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); 2447 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2448 }
2458} 2449}
2459 2450
2460enum mlxsw_sp_nexthop_type { 2451enum mlxsw_sp_nexthop_type {
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index e379b78e86ef..13190aa09faf 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -82,10 +82,33 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn)
82 return nfp_net_ebpf_capable(nn) ? "BPF" : ""; 82 return nfp_net_ebpf_capable(nn) ? "BPF" : "";
83} 83}
84 84
85static int
86nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
87{
88 int err;
89
90 nn->app_priv = kzalloc(sizeof(struct nfp_bpf_vnic), GFP_KERNEL);
91 if (!nn->app_priv)
92 return -ENOMEM;
93
94 err = nfp_app_nic_vnic_alloc(app, nn, id);
95 if (err)
96 goto err_free_priv;
97
98 return 0;
99err_free_priv:
100 kfree(nn->app_priv);
101 return err;
102}
103
85static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn) 104static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn)
86{ 105{
106 struct nfp_bpf_vnic *bv = nn->app_priv;
107
87 if (nn->dp.bpf_offload_xdp) 108 if (nn->dp.bpf_offload_xdp)
88 nfp_bpf_xdp_offload(app, nn, NULL); 109 nfp_bpf_xdp_offload(app, nn, NULL);
110 WARN_ON(bv->tc_prog);
111 kfree(bv);
89} 112}
90 113
91static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, 114static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
@@ -93,6 +116,9 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
93{ 116{
94 struct tc_cls_bpf_offload *cls_bpf = type_data; 117 struct tc_cls_bpf_offload *cls_bpf = type_data;
95 struct nfp_net *nn = cb_priv; 118 struct nfp_net *nn = cb_priv;
119 struct bpf_prog *oldprog;
120 struct nfp_bpf_vnic *bv;
121 int err;
96 122
97 if (type != TC_SETUP_CLSBPF || 123 if (type != TC_SETUP_CLSBPF ||
98 !tc_can_offload(nn->dp.netdev) || 124 !tc_can_offload(nn->dp.netdev) ||
@@ -100,8 +126,6 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
100 cls_bpf->common.protocol != htons(ETH_P_ALL) || 126 cls_bpf->common.protocol != htons(ETH_P_ALL) ||
101 cls_bpf->common.chain_index) 127 cls_bpf->common.chain_index)
102 return -EOPNOTSUPP; 128 return -EOPNOTSUPP;
103 if (nn->dp.bpf_offload_xdp)
104 return -EBUSY;
105 129
106 /* Only support TC direct action */ 130 /* Only support TC direct action */
107 if (!cls_bpf->exts_integrated || 131 if (!cls_bpf->exts_integrated ||
@@ -110,16 +134,25 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
110 return -EOPNOTSUPP; 134 return -EOPNOTSUPP;
111 } 135 }
112 136
113 switch (cls_bpf->command) { 137 if (cls_bpf->command != TC_CLSBPF_OFFLOAD)
114 case TC_CLSBPF_REPLACE:
115 return nfp_net_bpf_offload(nn, cls_bpf->prog, true);
116 case TC_CLSBPF_ADD:
117 return nfp_net_bpf_offload(nn, cls_bpf->prog, false);
118 case TC_CLSBPF_DESTROY:
119 return nfp_net_bpf_offload(nn, NULL, true);
120 default:
121 return -EOPNOTSUPP; 138 return -EOPNOTSUPP;
139
140 bv = nn->app_priv;
141 oldprog = cls_bpf->oldprog;
142
143 /* Don't remove if oldprog doesn't match driver's state */
144 if (bv->tc_prog != oldprog) {
145 oldprog = NULL;
146 if (!cls_bpf->prog)
147 return 0;
122 } 148 }
149
150 err = nfp_net_bpf_offload(nn, cls_bpf->prog, oldprog);
151 if (err)
152 return err;
153
154 bv->tc_prog = cls_bpf->prog;
155 return 0;
123} 156}
124 157
125static int nfp_bpf_setup_tc_block(struct net_device *netdev, 158static int nfp_bpf_setup_tc_block(struct net_device *netdev,
@@ -167,7 +200,7 @@ const struct nfp_app_type app_bpf = {
167 200
168 .extra_cap = nfp_bpf_extra_cap, 201 .extra_cap = nfp_bpf_extra_cap,
169 202
170 .vnic_alloc = nfp_app_nic_vnic_alloc, 203 .vnic_alloc = nfp_bpf_vnic_alloc,
171 .vnic_free = nfp_bpf_vnic_free, 204 .vnic_free = nfp_bpf_vnic_free,
172 205
173 .setup_tc = nfp_bpf_setup_tc, 206 .setup_tc = nfp_bpf_setup_tc,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 082a15f6dfb5..57b6043177a3 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -172,6 +172,14 @@ struct nfp_prog {
172 struct list_head insns; 172 struct list_head insns;
173}; 173};
174 174
175/**
176 * struct nfp_bpf_vnic - per-vNIC BPF priv structure
177 * @tc_prog: currently loaded cls_bpf program
178 */
179struct nfp_bpf_vnic {
180 struct bpf_prog *tc_prog;
181};
182
175int nfp_bpf_jit(struct nfp_prog *prog); 183int nfp_bpf_jit(struct nfp_prog *prog);
176 184
177extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops; 185extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops;
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.c b/drivers/net/ethernet/qualcomm/emac/emac-phy.c
index 18461fcb9815..53dbf1e163a8 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-phy.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.c
@@ -47,6 +47,7 @@
47#define MDIO_CLK_25_28 7 47#define MDIO_CLK_25_28 7
48 48
49#define MDIO_WAIT_TIMES 1000 49#define MDIO_WAIT_TIMES 1000
50#define MDIO_STATUS_DELAY_TIME 1
50 51
51static int emac_mdio_read(struct mii_bus *bus, int addr, int regnum) 52static int emac_mdio_read(struct mii_bus *bus, int addr, int regnum)
52{ 53{
@@ -65,7 +66,7 @@ static int emac_mdio_read(struct mii_bus *bus, int addr, int regnum)
65 66
66 if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg, 67 if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg,
67 !(reg & (MDIO_START | MDIO_BUSY)), 68 !(reg & (MDIO_START | MDIO_BUSY)),
68 100, MDIO_WAIT_TIMES * 100)) 69 MDIO_STATUS_DELAY_TIME, MDIO_WAIT_TIMES * 100))
69 return -EIO; 70 return -EIO;
70 71
71 return (reg >> MDIO_DATA_SHFT) & MDIO_DATA_BMSK; 72 return (reg >> MDIO_DATA_SHFT) & MDIO_DATA_BMSK;
@@ -88,8 +89,8 @@ static int emac_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val)
88 writel(reg, adpt->base + EMAC_MDIO_CTRL); 89 writel(reg, adpt->base + EMAC_MDIO_CTRL);
89 90
90 if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg, 91 if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg,
91 !(reg & (MDIO_START | MDIO_BUSY)), 100, 92 !(reg & (MDIO_START | MDIO_BUSY)),
92 MDIO_WAIT_TIMES * 100)) 93 MDIO_STATUS_DELAY_TIME, MDIO_WAIT_TIMES * 100))
93 return -EIO; 94 return -EIO;
94 95
95 return 0; 96 return 0;
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 70c92b649b29..38c924bdd32e 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -253,18 +253,18 @@ static int emac_open(struct net_device *netdev)
253 return ret; 253 return ret;
254 } 254 }
255 255
256 ret = emac_mac_up(adpt); 256 ret = adpt->phy.open(adpt);
257 if (ret) { 257 if (ret) {
258 emac_mac_rx_tx_rings_free_all(adpt); 258 emac_mac_rx_tx_rings_free_all(adpt);
259 free_irq(irq->irq, irq); 259 free_irq(irq->irq, irq);
260 return ret; 260 return ret;
261 } 261 }
262 262
263 ret = adpt->phy.open(adpt); 263 ret = emac_mac_up(adpt);
264 if (ret) { 264 if (ret) {
265 emac_mac_down(adpt);
266 emac_mac_rx_tx_rings_free_all(adpt); 265 emac_mac_rx_tx_rings_free_all(adpt);
267 free_irq(irq->irq, irq); 266 free_irq(irq->irq, irq);
267 adpt->phy.close(adpt);
268 return ret; 268 return ret;
269 } 269 }
270 270
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 2b962d349f5f..009780df664b 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -2308,32 +2308,9 @@ static int __maybe_unused ravb_resume(struct device *dev)
2308 struct ravb_private *priv = netdev_priv(ndev); 2308 struct ravb_private *priv = netdev_priv(ndev);
2309 int ret = 0; 2309 int ret = 0;
2310 2310
2311 if (priv->wol_enabled) { 2311 /* If WoL is enabled set reset mode to rearm the WoL logic */
2312 /* Reduce the usecount of the clock to zero and then 2312 if (priv->wol_enabled)
2313 * restore it to its original value. This is done to force
2314 * the clock to be re-enabled which is a workaround
2315 * for renesas-cpg-mssr driver which do not enable clocks
2316 * when resuming from PSCI suspend/resume.
2317 *
2318 * Without this workaround the driver fails to communicate
2319 * with the hardware if WoL was enabled when the system
2320 * entered PSCI suspend. This is due to that if WoL is enabled
2321 * we explicitly keep the clock from being turned off when
2322 * suspending, but in PSCI sleep power is cut so the clock
2323 * is disabled anyhow, the clock driver is not aware of this
2324 * so the clock is not turned back on when resuming.
2325 *
2326 * TODO: once the renesas-cpg-mssr suspend/resume is working
2327 * this clock dance should be removed.
2328 */
2329 clk_disable(priv->clk);
2330 clk_disable(priv->clk);
2331 clk_enable(priv->clk);
2332 clk_enable(priv->clk);
2333
2334 /* Set reset mode to rearm the WoL logic */
2335 ravb_write(ndev, CCC_OPC_RESET, CCC); 2313 ravb_write(ndev, CCC_OPC_RESET, CCC);
2336 }
2337 2314
2338 /* All register have been reset to default values. 2315 /* All register have been reset to default values.
2339 * Restore all registers which where setup at probe time and 2316 * Restore all registers which where setup at probe time and
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index db72d13cebb9..75323000c364 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -1892,6 +1892,16 @@ static int sh_eth_phy_init(struct net_device *ndev)
1892 return PTR_ERR(phydev); 1892 return PTR_ERR(phydev);
1893 } 1893 }
1894 1894
1895 /* mask with MAC supported features */
1896 if (mdp->cd->register_type != SH_ETH_REG_GIGABIT) {
1897 int err = phy_set_max_speed(phydev, SPEED_100);
1898 if (err) {
1899 netdev_err(ndev, "failed to limit PHY to 100 Mbit/s\n");
1900 phy_disconnect(phydev);
1901 return err;
1902 }
1903 }
1904
1895 phy_attached_info(phydev); 1905 phy_attached_info(phydev);
1896 1906
1897 return 0; 1907 return 0;
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index e1e5ac053760..ce2ea2d491ac 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -409,7 +409,7 @@ struct stmmac_desc_ops {
409 /* get timestamp value */ 409 /* get timestamp value */
410 u64(*get_timestamp) (void *desc, u32 ats); 410 u64(*get_timestamp) (void *desc, u32 ats);
411 /* get rx timestamp status */ 411 /* get rx timestamp status */
412 int (*get_rx_timestamp_status) (void *desc, u32 ats); 412 int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats);
413 /* Display ring */ 413 /* Display ring */
414 void (*display_ring)(void *head, unsigned int size, bool rx); 414 void (*display_ring)(void *head, unsigned int size, bool rx);
415 /* set MSS via context descriptor */ 415 /* set MSS via context descriptor */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 4b286e27c4ca..7e089bf906b4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -258,7 +258,8 @@ static int dwmac4_rx_check_timestamp(void *desc)
258 return ret; 258 return ret;
259} 259}
260 260
261static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats) 261static int dwmac4_wrback_get_rx_timestamp_status(void *desc, void *next_desc,
262 u32 ats)
262{ 263{
263 struct dma_desc *p = (struct dma_desc *)desc; 264 struct dma_desc *p = (struct dma_desc *)desc;
264 int ret = -EINVAL; 265 int ret = -EINVAL;
@@ -270,7 +271,7 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats)
270 271
271 /* Check if timestamp is OK from context descriptor */ 272 /* Check if timestamp is OK from context descriptor */
272 do { 273 do {
273 ret = dwmac4_rx_check_timestamp(desc); 274 ret = dwmac4_rx_check_timestamp(next_desc);
274 if (ret < 0) 275 if (ret < 0)
275 goto exit; 276 goto exit;
276 i++; 277 i++;
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index 7546b3664113..2a828a312814 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -400,7 +400,8 @@ static u64 enh_desc_get_timestamp(void *desc, u32 ats)
400 return ns; 400 return ns;
401} 401}
402 402
403static int enh_desc_get_rx_timestamp_status(void *desc, u32 ats) 403static int enh_desc_get_rx_timestamp_status(void *desc, void *next_desc,
404 u32 ats)
404{ 405{
405 if (ats) { 406 if (ats) {
406 struct dma_extended_desc *p = (struct dma_extended_desc *)desc; 407 struct dma_extended_desc *p = (struct dma_extended_desc *)desc;
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index f817f8f36569..db4cee57bb24 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -265,7 +265,7 @@ static u64 ndesc_get_timestamp(void *desc, u32 ats)
265 return ns; 265 return ns;
266} 266}
267 267
268static int ndesc_get_rx_timestamp_status(void *desc, u32 ats) 268static int ndesc_get_rx_timestamp_status(void *desc, void *next_desc, u32 ats)
269{ 269{
270 struct dma_desc *p = (struct dma_desc *)desc; 270 struct dma_desc *p = (struct dma_desc *)desc;
271 271
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index 721b61655261..08c19ebd5306 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -34,6 +34,7 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
34{ 34{
35 u32 value = readl(ioaddr + PTP_TCR); 35 u32 value = readl(ioaddr + PTP_TCR);
36 unsigned long data; 36 unsigned long data;
37 u32 reg_value;
37 38
38 /* For GMAC3.x, 4.x versions, convert the ptp_clock to nano second 39 /* For GMAC3.x, 4.x versions, convert the ptp_clock to nano second
39 * formula = (1/ptp_clock) * 1000000000 40 * formula = (1/ptp_clock) * 1000000000
@@ -50,10 +51,11 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
50 51
51 data &= PTP_SSIR_SSINC_MASK; 52 data &= PTP_SSIR_SSINC_MASK;
52 53
54 reg_value = data;
53 if (gmac4) 55 if (gmac4)
54 data = data << GMAC4_PTP_SSIR_SSINC_SHIFT; 56 reg_value <<= GMAC4_PTP_SSIR_SSINC_SHIFT;
55 57
56 writel(data, ioaddr + PTP_SSIR); 58 writel(reg_value, ioaddr + PTP_SSIR);
57 59
58 return data; 60 return data;
59} 61}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d7250539d0bd..337d53d12e94 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -482,7 +482,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
482 desc = np; 482 desc = np;
483 483
484 /* Check if timestamp is available */ 484 /* Check if timestamp is available */
485 if (priv->hw->desc->get_rx_timestamp_status(desc, priv->adv_ts)) { 485 if (priv->hw->desc->get_rx_timestamp_status(p, np, priv->adv_ts)) {
486 ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts); 486 ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts);
487 netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns); 487 netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
488 shhwtstamp = skb_hwtstamps(skb); 488 shhwtstamp = skb_hwtstamps(skb);
diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c
index 8483f03d5a41..1ab97d99b9ba 100644
--- a/drivers/net/hippi/rrunner.c
+++ b/drivers/net/hippi/rrunner.c
@@ -1379,8 +1379,8 @@ static int rr_close(struct net_device *dev)
1379 rrpriv->info_dma); 1379 rrpriv->info_dma);
1380 rrpriv->info = NULL; 1380 rrpriv->info = NULL;
1381 1381
1382 free_irq(pdev->irq, dev);
1383 spin_unlock_irqrestore(&rrpriv->lock, flags); 1382 spin_unlock_irqrestore(&rrpriv->lock, flags);
1383 free_irq(pdev->irq, dev);
1384 1384
1385 return 0; 1385 return 0;
1386} 1386}
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index 5f93e6add563..e911e4990b20 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -239,14 +239,10 @@ static int at803x_resume(struct phy_device *phydev)
239{ 239{
240 int value; 240 int value;
241 241
242 mutex_lock(&phydev->lock);
243
244 value = phy_read(phydev, MII_BMCR); 242 value = phy_read(phydev, MII_BMCR);
245 value &= ~(BMCR_PDOWN | BMCR_ISOLATE); 243 value &= ~(BMCR_PDOWN | BMCR_ISOLATE);
246 phy_write(phydev, MII_BMCR, value); 244 phy_write(phydev, MII_BMCR, value);
247 245
248 mutex_unlock(&phydev->lock);
249
250 return 0; 246 return 0;
251} 247}
252 248
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 4d02b27df044..82104edca393 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -637,6 +637,10 @@ static int m88e1510_config_aneg(struct phy_device *phydev)
637 if (err < 0) 637 if (err < 0)
638 goto error; 638 goto error;
639 639
640 /* Do not touch the fiber page if we're in copper->sgmii mode */
641 if (phydev->interface == PHY_INTERFACE_MODE_SGMII)
642 return 0;
643
640 /* Then the fiber link */ 644 /* Then the fiber link */
641 err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE); 645 err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE);
642 if (err < 0) 646 if (err < 0)
@@ -875,6 +879,8 @@ static int m88e1510_config_init(struct phy_device *phydev)
875 879
876 /* SGMII-to-Copper mode initialization */ 880 /* SGMII-to-Copper mode initialization */
877 if (phydev->interface == PHY_INTERFACE_MODE_SGMII) { 881 if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
882 u32 pause;
883
878 /* Select page 18 */ 884 /* Select page 18 */
879 err = marvell_set_page(phydev, 18); 885 err = marvell_set_page(phydev, 18);
880 if (err < 0) 886 if (err < 0)
@@ -898,6 +904,16 @@ static int m88e1510_config_init(struct phy_device *phydev)
898 err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE); 904 err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE);
899 if (err < 0) 905 if (err < 0)
900 return err; 906 return err;
907
908 /* There appears to be a bug in the 88e1512 when used in
909 * SGMII to copper mode, where the AN advertisment register
910 * clears the pause bits each time a negotiation occurs.
911 * This means we can never be truely sure what was advertised,
912 * so disable Pause support.
913 */
914 pause = SUPPORTED_Pause | SUPPORTED_Asym_Pause;
915 phydev->supported &= ~pause;
916 phydev->advertising &= ~pause;
901 } 917 }
902 918
903 return m88e1121_config_init(phydev); 919 return m88e1121_config_init(phydev);
@@ -2069,7 +2085,7 @@ static struct phy_driver marvell_drivers[] = {
2069 .flags = PHY_HAS_INTERRUPT, 2085 .flags = PHY_HAS_INTERRUPT,
2070 .probe = marvell_probe, 2086 .probe = marvell_probe,
2071 .config_init = &m88e1145_config_init, 2087 .config_init = &m88e1145_config_init,
2072 .config_aneg = &marvell_config_aneg, 2088 .config_aneg = &m88e1101_config_aneg,
2073 .read_status = &genphy_read_status, 2089 .read_status = &genphy_read_status,
2074 .ack_interrupt = &marvell_ack_interrupt, 2090 .ack_interrupt = &marvell_ack_interrupt,
2075 .config_intr = &marvell_config_intr, 2091 .config_intr = &marvell_config_intr,
diff --git a/drivers/net/phy/mdio-xgene.c b/drivers/net/phy/mdio-xgene.c
index bfd3090fb055..07c6048200c6 100644
--- a/drivers/net/phy/mdio-xgene.c
+++ b/drivers/net/phy/mdio-xgene.c
@@ -194,8 +194,11 @@ static int xgene_mdio_reset(struct xgene_mdio_pdata *pdata)
194 } 194 }
195 195
196 ret = xgene_enet_ecc_init(pdata); 196 ret = xgene_enet_ecc_init(pdata);
197 if (ret) 197 if (ret) {
198 if (pdata->dev->of_node)
199 clk_disable_unprepare(pdata->clk);
198 return ret; 200 return ret;
201 }
199 xgene_gmac_reset(pdata); 202 xgene_gmac_reset(pdata);
200 203
201 return 0; 204 return 0;
@@ -388,8 +391,10 @@ static int xgene_mdio_probe(struct platform_device *pdev)
388 return ret; 391 return ret;
389 392
390 mdio_bus = mdiobus_alloc(); 393 mdio_bus = mdiobus_alloc();
391 if (!mdio_bus) 394 if (!mdio_bus) {
392 return -ENOMEM; 395 ret = -ENOMEM;
396 goto out_clk;
397 }
393 398
394 mdio_bus->name = "APM X-Gene MDIO bus"; 399 mdio_bus->name = "APM X-Gene MDIO bus";
395 400
@@ -418,7 +423,7 @@ static int xgene_mdio_probe(struct platform_device *pdev)
418 mdio_bus->phy_mask = ~0; 423 mdio_bus->phy_mask = ~0;
419 ret = mdiobus_register(mdio_bus); 424 ret = mdiobus_register(mdio_bus);
420 if (ret) 425 if (ret)
421 goto out; 426 goto out_mdiobus;
422 427
423 acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_HANDLE(dev), 1, 428 acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_HANDLE(dev), 1,
424 acpi_register_phy, NULL, mdio_bus, NULL); 429 acpi_register_phy, NULL, mdio_bus, NULL);
@@ -426,16 +431,20 @@ static int xgene_mdio_probe(struct platform_device *pdev)
426 } 431 }
427 432
428 if (ret) 433 if (ret)
429 goto out; 434 goto out_mdiobus;
430 435
431 pdata->mdio_bus = mdio_bus; 436 pdata->mdio_bus = mdio_bus;
432 xgene_mdio_status = true; 437 xgene_mdio_status = true;
433 438
434 return 0; 439 return 0;
435 440
436out: 441out_mdiobus:
437 mdiobus_free(mdio_bus); 442 mdiobus_free(mdio_bus);
438 443
444out_clk:
445 if (dev->of_node)
446 clk_disable_unprepare(pdata->clk);
447
439 return ret; 448 return ret;
440} 449}
441 450
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 2df7b62c1a36..54d00a1d2bef 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -270,6 +270,7 @@ static void of_mdiobus_link_mdiodev(struct mii_bus *bus,
270 270
271 if (addr == mdiodev->addr) { 271 if (addr == mdiodev->addr) {
272 dev->of_node = child; 272 dev->of_node = child;
273 dev->fwnode = of_fwnode_handle(child);
273 return; 274 return;
274 } 275 }
275 } 276 }
diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c
index 1ea69b7585d9..842eb871a6e3 100644
--- a/drivers/net/phy/meson-gxl.c
+++ b/drivers/net/phy/meson-gxl.c
@@ -22,6 +22,7 @@
22#include <linux/ethtool.h> 22#include <linux/ethtool.h>
23#include <linux/phy.h> 23#include <linux/phy.h>
24#include <linux/netdevice.h> 24#include <linux/netdevice.h>
25#include <linux/bitfield.h>
25 26
26static int meson_gxl_config_init(struct phy_device *phydev) 27static int meson_gxl_config_init(struct phy_device *phydev)
27{ 28{
@@ -50,6 +51,77 @@ static int meson_gxl_config_init(struct phy_device *phydev)
50 return 0; 51 return 0;
51} 52}
52 53
54/* This function is provided to cope with the possible failures of this phy
55 * during aneg process. When aneg fails, the PHY reports that aneg is done
56 * but the value found in MII_LPA is wrong:
57 * - Early failures: MII_LPA is just 0x0001. if MII_EXPANSION reports that
58 * the link partner (LP) supports aneg but the LP never acked our base
59 * code word, it is likely that we never sent it to begin with.
60 * - Late failures: MII_LPA is filled with a value which seems to make sense
61 * but it actually is not what the LP is advertising. It seems that we
62 * can detect this using a magic bit in the WOL bank (reg 12 - bit 12).
63 * If this particular bit is not set when aneg is reported being done,
64 * it means MII_LPA is likely to be wrong.
65 *
66 * In both case, forcing a restart of the aneg process solve the problem.
67 * When this failure happens, the first retry is usually successful but,
68 * in some cases, it may take up to 6 retries to get a decent result
69 */
70static int meson_gxl_read_status(struct phy_device *phydev)
71{
72 int ret, wol, lpa, exp;
73
74 if (phydev->autoneg == AUTONEG_ENABLE) {
75 ret = genphy_aneg_done(phydev);
76 if (ret < 0)
77 return ret;
78 else if (!ret)
79 goto read_status_continue;
80
81 /* Need to access WOL bank, make sure the access is open */
82 ret = phy_write(phydev, 0x14, 0x0000);
83 if (ret)
84 return ret;
85 ret = phy_write(phydev, 0x14, 0x0400);
86 if (ret)
87 return ret;
88 ret = phy_write(phydev, 0x14, 0x0000);
89 if (ret)
90 return ret;
91 ret = phy_write(phydev, 0x14, 0x0400);
92 if (ret)
93 return ret;
94
95 /* Request LPI_STATUS WOL register */
96 ret = phy_write(phydev, 0x14, 0x8D80);
97 if (ret)
98 return ret;
99
100 /* Read LPI_STATUS value */
101 wol = phy_read(phydev, 0x15);
102 if (wol < 0)
103 return wol;
104
105 lpa = phy_read(phydev, MII_LPA);
106 if (lpa < 0)
107 return lpa;
108
109 exp = phy_read(phydev, MII_EXPANSION);
110 if (exp < 0)
111 return exp;
112
113 if (!(wol & BIT(12)) ||
114 ((exp & EXPANSION_NWAY) && !(lpa & LPA_LPACK))) {
115 /* Looks like aneg failed after all */
116 phydev_dbg(phydev, "LPA corruption - aneg restart\n");
117 return genphy_restart_aneg(phydev);
118 }
119 }
120
121read_status_continue:
122 return genphy_read_status(phydev);
123}
124
53static struct phy_driver meson_gxl_phy[] = { 125static struct phy_driver meson_gxl_phy[] = {
54 { 126 {
55 .phy_id = 0x01814400, 127 .phy_id = 0x01814400,
@@ -60,7 +132,7 @@ static struct phy_driver meson_gxl_phy[] = {
60 .config_init = meson_gxl_config_init, 132 .config_init = meson_gxl_config_init,
61 .config_aneg = genphy_config_aneg, 133 .config_aneg = genphy_config_aneg,
62 .aneg_done = genphy_aneg_done, 134 .aneg_done = genphy_aneg_done,
63 .read_status = genphy_read_status, 135 .read_status = meson_gxl_read_status,
64 .suspend = genphy_suspend, 136 .suspend = genphy_suspend,
65 .resume = genphy_resume, 137 .resume = genphy_resume,
66 }, 138 },
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index ab4614113403..422ff6333c52 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -624,6 +624,7 @@ static int ksz9031_read_status(struct phy_device *phydev)
624 phydev->link = 0; 624 phydev->link = 0;
625 if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev)) 625 if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev))
626 phydev->drv->config_intr(phydev); 626 phydev->drv->config_intr(phydev);
627 return genphy_config_aneg(phydev);
627 } 628 }
628 629
629 return 0; 630 return 0;
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 2b1e67bc1e73..ed10d1fc8f59 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -828,7 +828,6 @@ EXPORT_SYMBOL(phy_stop);
828 */ 828 */
829void phy_start(struct phy_device *phydev) 829void phy_start(struct phy_device *phydev)
830{ 830{
831 bool do_resume = false;
832 int err = 0; 831 int err = 0;
833 832
834 mutex_lock(&phydev->lock); 833 mutex_lock(&phydev->lock);
@@ -841,6 +840,9 @@ void phy_start(struct phy_device *phydev)
841 phydev->state = PHY_UP; 840 phydev->state = PHY_UP;
842 break; 841 break;
843 case PHY_HALTED: 842 case PHY_HALTED:
843 /* if phy was suspended, bring the physical link up again */
844 phy_resume(phydev);
845
844 /* make sure interrupts are re-enabled for the PHY */ 846 /* make sure interrupts are re-enabled for the PHY */
845 if (phydev->irq != PHY_POLL) { 847 if (phydev->irq != PHY_POLL) {
846 err = phy_enable_interrupts(phydev); 848 err = phy_enable_interrupts(phydev);
@@ -849,17 +851,12 @@ void phy_start(struct phy_device *phydev)
849 } 851 }
850 852
851 phydev->state = PHY_RESUMING; 853 phydev->state = PHY_RESUMING;
852 do_resume = true;
853 break; 854 break;
854 default: 855 default:
855 break; 856 break;
856 } 857 }
857 mutex_unlock(&phydev->lock); 858 mutex_unlock(&phydev->lock);
858 859
859 /* if phy was suspended, bring the physical link up again */
860 if (do_resume)
861 phy_resume(phydev);
862
863 phy_trigger_machine(phydev, true); 860 phy_trigger_machine(phydev, true);
864} 861}
865EXPORT_SYMBOL(phy_start); 862EXPORT_SYMBOL(phy_start);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 67f25ac29025..b15b31ca2618 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -135,7 +135,9 @@ static int mdio_bus_phy_resume(struct device *dev)
135 if (!mdio_bus_phy_may_suspend(phydev)) 135 if (!mdio_bus_phy_may_suspend(phydev))
136 goto no_resume; 136 goto no_resume;
137 137
138 mutex_lock(&phydev->lock);
138 ret = phy_resume(phydev); 139 ret = phy_resume(phydev);
140 mutex_unlock(&phydev->lock);
139 if (ret < 0) 141 if (ret < 0)
140 return ret; 142 return ret;
141 143
@@ -1026,7 +1028,9 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
1026 if (err) 1028 if (err)
1027 goto error; 1029 goto error;
1028 1030
1031 mutex_lock(&phydev->lock);
1029 phy_resume(phydev); 1032 phy_resume(phydev);
1033 mutex_unlock(&phydev->lock);
1030 phy_led_triggers_register(phydev); 1034 phy_led_triggers_register(phydev);
1031 1035
1032 return err; 1036 return err;
@@ -1157,6 +1161,8 @@ int phy_resume(struct phy_device *phydev)
1157 struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver); 1161 struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver);
1158 int ret = 0; 1162 int ret = 0;
1159 1163
1164 WARN_ON(!mutex_is_locked(&phydev->lock));
1165
1160 if (phydev->drv && phydrv->resume) 1166 if (phydev->drv && phydrv->resume)
1161 ret = phydrv->resume(phydev); 1167 ret = phydrv->resume(phydev);
1162 1168
@@ -1639,13 +1645,9 @@ int genphy_resume(struct phy_device *phydev)
1639{ 1645{
1640 int value; 1646 int value;
1641 1647
1642 mutex_lock(&phydev->lock);
1643
1644 value = phy_read(phydev, MII_BMCR); 1648 value = phy_read(phydev, MII_BMCR);
1645 phy_write(phydev, MII_BMCR, value & ~BMCR_PDOWN); 1649 phy_write(phydev, MII_BMCR, value & ~BMCR_PDOWN);
1646 1650
1647 mutex_unlock(&phydev->lock);
1648
1649 return 0; 1651 return 0;
1650} 1652}
1651EXPORT_SYMBOL(genphy_resume); 1653EXPORT_SYMBOL(genphy_resume);
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 5dc9668dde34..827f3f92560e 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -526,6 +526,7 @@ struct phylink *phylink_create(struct net_device *ndev, struct device_node *np,
526 pl->link_config.pause = MLO_PAUSE_AN; 526 pl->link_config.pause = MLO_PAUSE_AN;
527 pl->link_config.speed = SPEED_UNKNOWN; 527 pl->link_config.speed = SPEED_UNKNOWN;
528 pl->link_config.duplex = DUPLEX_UNKNOWN; 528 pl->link_config.duplex = DUPLEX_UNKNOWN;
529 pl->link_config.an_enabled = true;
529 pl->ops = ops; 530 pl->ops = ops;
530 __set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state); 531 __set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
531 532
@@ -951,6 +952,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
951 mutex_lock(&pl->state_mutex); 952 mutex_lock(&pl->state_mutex);
952 /* Configure the MAC to match the new settings */ 953 /* Configure the MAC to match the new settings */
953 linkmode_copy(pl->link_config.advertising, our_kset.link_modes.advertising); 954 linkmode_copy(pl->link_config.advertising, our_kset.link_modes.advertising);
955 pl->link_config.interface = config.interface;
954 pl->link_config.speed = our_kset.base.speed; 956 pl->link_config.speed = our_kset.base.speed;
955 pl->link_config.duplex = our_kset.base.duplex; 957 pl->link_config.duplex = our_kset.base.duplex;
956 pl->link_config.an_enabled = our_kset.base.autoneg != AUTONEG_DISABLE; 958 pl->link_config.an_enabled = our_kset.base.autoneg != AUTONEG_DISABLE;
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 304ec6555cd8..3000ddd1c7e2 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1204,12 +1204,14 @@ static const struct usb_device_id products[] = {
1204 {QMI_FIXED_INTF(0x1199, 0x9079, 10)}, /* Sierra Wireless EM74xx */ 1204 {QMI_FIXED_INTF(0x1199, 0x9079, 10)}, /* Sierra Wireless EM74xx */
1205 {QMI_FIXED_INTF(0x1199, 0x907b, 8)}, /* Sierra Wireless EM74xx */ 1205 {QMI_FIXED_INTF(0x1199, 0x907b, 8)}, /* Sierra Wireless EM74xx */
1206 {QMI_FIXED_INTF(0x1199, 0x907b, 10)}, /* Sierra Wireless EM74xx */ 1206 {QMI_FIXED_INTF(0x1199, 0x907b, 10)}, /* Sierra Wireless EM74xx */
1207 {QMI_FIXED_INTF(0x1199, 0x9091, 8)}, /* Sierra Wireless EM7565 */
1207 {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */ 1208 {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
1208 {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */ 1209 {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */
1209 {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ 1210 {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */
1210 {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ 1211 {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */
1211 {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ 1212 {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */
1212 {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ 1213 {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */
1214 {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */
1213 {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ 1215 {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */
1214 {QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)}, /* Telit LE920, LE920A4 */ 1216 {QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)}, /* Telit LE920, LE920A4 */
1215 {QMI_FIXED_INTF(0x1c9e, 0x9801, 3)}, /* Telewell TW-3G HSPA+ */ 1217 {QMI_FIXED_INTF(0x1c9e, 0x9801, 3)}, /* Telewell TW-3G HSPA+ */
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 19b9cc51079e..31f4b7911ef8 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2155,6 +2155,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
2155 } 2155 }
2156 2156
2157 ndst = &rt->dst; 2157 ndst = &rt->dst;
2158 if (skb_dst(skb)) {
2159 int mtu = dst_mtu(ndst) - VXLAN_HEADROOM;
2160
2161 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
2162 skb, mtu);
2163 }
2164
2158 tos = ip_tunnel_ecn_encap(tos, old_iph, skb); 2165 tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
2159 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); 2166 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
2160 err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr), 2167 err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
@@ -2190,6 +2197,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
2190 goto out_unlock; 2197 goto out_unlock;
2191 } 2198 }
2192 2199
2200 if (skb_dst(skb)) {
2201 int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM;
2202
2203 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
2204 skb, mtu);
2205 }
2206
2193 tos = ip_tunnel_ecn_encap(tos, old_iph, skb); 2207 tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
2194 ttl = ttl ? : ip6_dst_hoplimit(ndst); 2208 ttl = ttl ? : ip6_dst_hoplimit(ndst);
2195 skb_scrub_packet(skb, xnet); 2209 skb_scrub_packet(skb, xnet);
@@ -3103,6 +3117,11 @@ static void vxlan_config_apply(struct net_device *dev,
3103 3117
3104 max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : 3118 max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM :
3105 VXLAN_HEADROOM); 3119 VXLAN_HEADROOM);
3120 if (max_mtu < ETH_MIN_MTU)
3121 max_mtu = ETH_MIN_MTU;
3122
3123 if (!changelink && !conf->mtu)
3124 dev->mtu = max_mtu;
3106 } 3125 }
3107 3126
3108 if (dev->mtu > max_mtu) 3127 if (dev->mtu > max_mtu)
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 10b075a46b26..e8189c07b41f 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -684,6 +684,7 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac,
684 hdr = skb_put(skb, sizeof(*hdr) - ETH_ALEN); 684 hdr = skb_put(skb, sizeof(*hdr) - ETH_ALEN);
685 hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA | 685 hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA |
686 IEEE80211_STYPE_NULLFUNC | 686 IEEE80211_STYPE_NULLFUNC |
687 IEEE80211_FCTL_TODS |
687 (ps ? IEEE80211_FCTL_PM : 0)); 688 (ps ? IEEE80211_FCTL_PM : 0));
688 hdr->duration_id = cpu_to_le16(0); 689 hdr->duration_id = cpu_to_le16(0);
689 memcpy(hdr->addr1, vp->bssid, ETH_ALEN); 690 memcpy(hdr->addr1, vp->bssid, ETH_ALEN);
@@ -3215,7 +3216,7 @@ static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info)
3215 if (!net_eq(wiphy_net(data->hw->wiphy), genl_info_net(info))) 3216 if (!net_eq(wiphy_net(data->hw->wiphy), genl_info_net(info)))
3216 continue; 3217 continue;
3217 3218
3218 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 3219 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
3219 if (!skb) { 3220 if (!skb) {
3220 res = -ENOMEM; 3221 res = -ENOMEM;
3221 goto out_err; 3222 goto out_err;
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index e949e3302af4..c586bcdb5190 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -211,12 +211,12 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
211 return ret; 211 return ret;
212} 212}
213 213
214static int btt_log_read_pair(struct arena_info *arena, u32 lane, 214static int btt_log_group_read(struct arena_info *arena, u32 lane,
215 struct log_entry *ent) 215 struct log_group *log)
216{ 216{
217 return arena_read_bytes(arena, 217 return arena_read_bytes(arena,
218 arena->logoff + (2 * lane * LOG_ENT_SIZE), ent, 218 arena->logoff + (lane * LOG_GRP_SIZE), log,
219 2 * LOG_ENT_SIZE, 0); 219 LOG_GRP_SIZE, 0);
220} 220}
221 221
222static struct dentry *debugfs_root; 222static struct dentry *debugfs_root;
@@ -256,6 +256,8 @@ static void arena_debugfs_init(struct arena_info *a, struct dentry *parent,
256 debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff); 256 debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff);
257 debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off); 257 debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off);
258 debugfs_create_x32("flags", S_IRUGO, d, &a->flags); 258 debugfs_create_x32("flags", S_IRUGO, d, &a->flags);
259 debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]);
260 debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]);
259} 261}
260 262
261static void btt_debugfs_init(struct btt *btt) 263static void btt_debugfs_init(struct btt *btt)
@@ -274,6 +276,11 @@ static void btt_debugfs_init(struct btt *btt)
274 } 276 }
275} 277}
276 278
279static u32 log_seq(struct log_group *log, int log_idx)
280{
281 return le32_to_cpu(log->ent[log_idx].seq);
282}
283
277/* 284/*
278 * This function accepts two log entries, and uses the 285 * This function accepts two log entries, and uses the
279 * sequence number to find the 'older' entry. 286 * sequence number to find the 'older' entry.
@@ -283,8 +290,10 @@ static void btt_debugfs_init(struct btt *btt)
283 * 290 *
284 * TODO The logic feels a bit kludge-y. make it better.. 291 * TODO The logic feels a bit kludge-y. make it better..
285 */ 292 */
286static int btt_log_get_old(struct log_entry *ent) 293static int btt_log_get_old(struct arena_info *a, struct log_group *log)
287{ 294{
295 int idx0 = a->log_index[0];
296 int idx1 = a->log_index[1];
288 int old; 297 int old;
289 298
290 /* 299 /*
@@ -292,23 +301,23 @@ static int btt_log_get_old(struct log_entry *ent)
292 * the next time, the following logic works out to put this 301 * the next time, the following logic works out to put this
293 * (next) entry into [1] 302 * (next) entry into [1]
294 */ 303 */
295 if (ent[0].seq == 0) { 304 if (log_seq(log, idx0) == 0) {
296 ent[0].seq = cpu_to_le32(1); 305 log->ent[idx0].seq = cpu_to_le32(1);
297 return 0; 306 return 0;
298 } 307 }
299 308
300 if (ent[0].seq == ent[1].seq) 309 if (log_seq(log, idx0) == log_seq(log, idx1))
301 return -EINVAL; 310 return -EINVAL;
302 if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5) 311 if (log_seq(log, idx0) + log_seq(log, idx1) > 5)
303 return -EINVAL; 312 return -EINVAL;
304 313
305 if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) { 314 if (log_seq(log, idx0) < log_seq(log, idx1)) {
306 if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1) 315 if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1)
307 old = 0; 316 old = 0;
308 else 317 else
309 old = 1; 318 old = 1;
310 } else { 319 } else {
311 if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1) 320 if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1)
312 old = 1; 321 old = 1;
313 else 322 else
314 old = 0; 323 old = 0;
@@ -328,17 +337,18 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
328{ 337{
329 int ret; 338 int ret;
330 int old_ent, ret_ent; 339 int old_ent, ret_ent;
331 struct log_entry log[2]; 340 struct log_group log;
332 341
333 ret = btt_log_read_pair(arena, lane, log); 342 ret = btt_log_group_read(arena, lane, &log);
334 if (ret) 343 if (ret)
335 return -EIO; 344 return -EIO;
336 345
337 old_ent = btt_log_get_old(log); 346 old_ent = btt_log_get_old(arena, &log);
338 if (old_ent < 0 || old_ent > 1) { 347 if (old_ent < 0 || old_ent > 1) {
339 dev_err(to_dev(arena), 348 dev_err(to_dev(arena),
340 "log corruption (%d): lane %d seq [%d, %d]\n", 349 "log corruption (%d): lane %d seq [%d, %d]\n",
341 old_ent, lane, log[0].seq, log[1].seq); 350 old_ent, lane, log.ent[arena->log_index[0]].seq,
351 log.ent[arena->log_index[1]].seq);
342 /* TODO set error state? */ 352 /* TODO set error state? */
343 return -EIO; 353 return -EIO;
344 } 354 }
@@ -346,7 +356,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
346 ret_ent = (old_flag ? old_ent : (1 - old_ent)); 356 ret_ent = (old_flag ? old_ent : (1 - old_ent));
347 357
348 if (ent != NULL) 358 if (ent != NULL)
349 memcpy(ent, &log[ret_ent], LOG_ENT_SIZE); 359 memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE);
350 360
351 return ret_ent; 361 return ret_ent;
352} 362}
@@ -360,17 +370,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane,
360 u32 sub, struct log_entry *ent, unsigned long flags) 370 u32 sub, struct log_entry *ent, unsigned long flags)
361{ 371{
362 int ret; 372 int ret;
363 /* 373 u32 group_slot = arena->log_index[sub];
364 * Ignore the padding in log_entry for calculating log_half. 374 unsigned int log_half = LOG_ENT_SIZE / 2;
365 * The entry is 'committed' when we write the sequence number,
366 * and we want to ensure that that is the last thing written.
367 * We don't bother writing the padding as that would be extra
368 * media wear and write amplification
369 */
370 unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2;
371 u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE);
372 void *src = ent; 375 void *src = ent;
376 u64 ns_off;
373 377
378 ns_off = arena->logoff + (lane * LOG_GRP_SIZE) +
379 (group_slot * LOG_ENT_SIZE);
374 /* split the 16B write into atomic, durable halves */ 380 /* split the 16B write into atomic, durable halves */
375 ret = arena_write_bytes(arena, ns_off, src, log_half, flags); 381 ret = arena_write_bytes(arena, ns_off, src, log_half, flags);
376 if (ret) 382 if (ret)
@@ -453,7 +459,7 @@ static int btt_log_init(struct arena_info *arena)
453{ 459{
454 size_t logsize = arena->info2off - arena->logoff; 460 size_t logsize = arena->info2off - arena->logoff;
455 size_t chunk_size = SZ_4K, offset = 0; 461 size_t chunk_size = SZ_4K, offset = 0;
456 struct log_entry log; 462 struct log_entry ent;
457 void *zerobuf; 463 void *zerobuf;
458 int ret; 464 int ret;
459 u32 i; 465 u32 i;
@@ -485,11 +491,11 @@ static int btt_log_init(struct arena_info *arena)
485 } 491 }
486 492
487 for (i = 0; i < arena->nfree; i++) { 493 for (i = 0; i < arena->nfree; i++) {
488 log.lba = cpu_to_le32(i); 494 ent.lba = cpu_to_le32(i);
489 log.old_map = cpu_to_le32(arena->external_nlba + i); 495 ent.old_map = cpu_to_le32(arena->external_nlba + i);
490 log.new_map = cpu_to_le32(arena->external_nlba + i); 496 ent.new_map = cpu_to_le32(arena->external_nlba + i);
491 log.seq = cpu_to_le32(LOG_SEQ_INIT); 497 ent.seq = cpu_to_le32(LOG_SEQ_INIT);
492 ret = __btt_log_write(arena, i, 0, &log, 0); 498 ret = __btt_log_write(arena, i, 0, &ent, 0);
493 if (ret) 499 if (ret)
494 goto free; 500 goto free;
495 } 501 }
@@ -594,6 +600,123 @@ static int btt_freelist_init(struct arena_info *arena)
594 return 0; 600 return 0;
595} 601}
596 602
603static bool ent_is_padding(struct log_entry *ent)
604{
605 return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0)
606 && (ent->seq == 0);
607}
608
609/*
610 * Detecting valid log indices: We read a log group (see the comments in btt.h
611 * for a description of a 'log_group' and its 'slots'), and iterate over its
612 * four slots. We expect that a padding slot will be all-zeroes, and use this
613 * to detect a padding slot vs. an actual entry.
614 *
615 * If a log_group is in the initial state, i.e. hasn't been used since the
616 * creation of this BTT layout, it will have three of the four slots with
617 * zeroes. We skip over these log_groups for the detection of log_index. If
618 * all log_groups are in the initial state (i.e. the BTT has never been
619 * written to), it is safe to assume the 'new format' of log entries in slots
620 * (0, 1).
621 */
622static int log_set_indices(struct arena_info *arena)
623{
624 bool idx_set = false, initial_state = true;
625 int ret, log_index[2] = {-1, -1};
626 u32 i, j, next_idx = 0;
627 struct log_group log;
628 u32 pad_count = 0;
629
630 for (i = 0; i < arena->nfree; i++) {
631 ret = btt_log_group_read(arena, i, &log);
632 if (ret < 0)
633 return ret;
634
635 for (j = 0; j < 4; j++) {
636 if (!idx_set) {
637 if (ent_is_padding(&log.ent[j])) {
638 pad_count++;
639 continue;
640 } else {
641 /* Skip if index has been recorded */
642 if ((next_idx == 1) &&
643 (j == log_index[0]))
644 continue;
645 /* valid entry, record index */
646 log_index[next_idx] = j;
647 next_idx++;
648 }
649 if (next_idx == 2) {
650 /* two valid entries found */
651 idx_set = true;
652 } else if (next_idx > 2) {
653 /* too many valid indices */
654 return -ENXIO;
655 }
656 } else {
657 /*
658 * once the indices have been set, just verify
659 * that all subsequent log groups are either in
660 * their initial state or follow the same
661 * indices.
662 */
663 if (j == log_index[0]) {
664 /* entry must be 'valid' */
665 if (ent_is_padding(&log.ent[j]))
666 return -ENXIO;
667 } else if (j == log_index[1]) {
668 ;
669 /*
670 * log_index[1] can be padding if the
671 * lane never got used and it is still
672 * in the initial state (three 'padding'
673 * entries)
674 */
675 } else {
676 /* entry must be invalid (padding) */
677 if (!ent_is_padding(&log.ent[j]))
678 return -ENXIO;
679 }
680 }
681 }
682 /*
683 * If any of the log_groups have more than one valid,
684 * non-padding entry, then the we are no longer in the
685 * initial_state
686 */
687 if (pad_count < 3)
688 initial_state = false;
689 pad_count = 0;
690 }
691
692 if (!initial_state && !idx_set)
693 return -ENXIO;
694
695 /*
696 * If all the entries in the log were in the initial state,
697 * assume new padding scheme
698 */
699 if (initial_state)
700 log_index[1] = 1;
701
702 /*
703 * Only allow the known permutations of log/padding indices,
704 * i.e. (0, 1), and (0, 2)
705 */
706 if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2)))
707 ; /* known index possibilities */
708 else {
709 dev_err(to_dev(arena), "Found an unknown padding scheme\n");
710 return -ENXIO;
711 }
712
713 arena->log_index[0] = log_index[0];
714 arena->log_index[1] = log_index[1];
715 dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]);
716 dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]);
717 return 0;
718}
719
597static int btt_rtt_init(struct arena_info *arena) 720static int btt_rtt_init(struct arena_info *arena)
598{ 721{
599 arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL); 722 arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL);
@@ -650,8 +773,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
650 available -= 2 * BTT_PG_SIZE; 773 available -= 2 * BTT_PG_SIZE;
651 774
652 /* The log takes a fixed amount of space based on nfree */ 775 /* The log takes a fixed amount of space based on nfree */
653 logsize = roundup(2 * arena->nfree * sizeof(struct log_entry), 776 logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE);
654 BTT_PG_SIZE);
655 available -= logsize; 777 available -= logsize;
656 778
657 /* Calculate optimal split between map and data area */ 779 /* Calculate optimal split between map and data area */
@@ -668,6 +790,10 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
668 arena->mapoff = arena->dataoff + datasize; 790 arena->mapoff = arena->dataoff + datasize;
669 arena->logoff = arena->mapoff + mapsize; 791 arena->logoff = arena->mapoff + mapsize;
670 arena->info2off = arena->logoff + logsize; 792 arena->info2off = arena->logoff + logsize;
793
794 /* Default log indices are (0,1) */
795 arena->log_index[0] = 0;
796 arena->log_index[1] = 1;
671 return arena; 797 return arena;
672} 798}
673 799
@@ -758,6 +884,13 @@ static int discover_arenas(struct btt *btt)
758 arena->external_lba_start = cur_nlba; 884 arena->external_lba_start = cur_nlba;
759 parse_arena_meta(arena, super, cur_off); 885 parse_arena_meta(arena, super, cur_off);
760 886
887 ret = log_set_indices(arena);
888 if (ret) {
889 dev_err(to_dev(arena),
890 "Unable to deduce log/padding indices\n");
891 goto out;
892 }
893
761 mutex_init(&arena->err_lock); 894 mutex_init(&arena->err_lock);
762 ret = btt_freelist_init(arena); 895 ret = btt_freelist_init(arena);
763 if (ret) 896 if (ret)
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
index 578c2057524d..db3cb6d4d0d4 100644
--- a/drivers/nvdimm/btt.h
+++ b/drivers/nvdimm/btt.h
@@ -27,6 +27,7 @@
27#define MAP_ERR_MASK (1 << MAP_ERR_SHIFT) 27#define MAP_ERR_MASK (1 << MAP_ERR_SHIFT)
28#define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT))) 28#define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT)))
29#define MAP_ENT_NORMAL 0xC0000000 29#define MAP_ENT_NORMAL 0xC0000000
30#define LOG_GRP_SIZE sizeof(struct log_group)
30#define LOG_ENT_SIZE sizeof(struct log_entry) 31#define LOG_ENT_SIZE sizeof(struct log_entry)
31#define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */ 32#define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */
32#define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */ 33#define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */
@@ -50,12 +51,52 @@ enum btt_init_state {
50 INIT_READY 51 INIT_READY
51}; 52};
52 53
54/*
55 * A log group represents one log 'lane', and consists of four log entries.
56 * Two of the four entries are valid entries, and the remaining two are
57 * padding. Due to an old bug in the padding location, we need to perform a
58 * test to determine the padding scheme being used, and use that scheme
59 * thereafter.
60 *
61 * In kernels prior to 4.15, 'log group' would have actual log entries at
62 * indices (0, 2) and padding at indices (1, 3), where as the correct/updated
63 * format has log entries at indices (0, 1) and padding at indices (2, 3).
64 *
65 * Old (pre 4.15) format:
66 * +-----------------+-----------------+
67 * | ent[0] | ent[1] |
68 * | 16B | 16B |
69 * | lba/old/new/seq | pad |
70 * +-----------------------------------+
71 * | ent[2] | ent[3] |
72 * | 16B | 16B |
73 * | lba/old/new/seq | pad |
74 * +-----------------+-----------------+
75 *
76 * New format:
77 * +-----------------+-----------------+
78 * | ent[0] | ent[1] |
79 * | 16B | 16B |
80 * | lba/old/new/seq | lba/old/new/seq |
81 * +-----------------------------------+
82 * | ent[2] | ent[3] |
83 * | 16B | 16B |
84 * | pad | pad |
85 * +-----------------+-----------------+
86 *
87 * We detect during start-up which format is in use, and set
88 * arena->log_index[(0, 1)] with the detected format.
89 */
90
53struct log_entry { 91struct log_entry {
54 __le32 lba; 92 __le32 lba;
55 __le32 old_map; 93 __le32 old_map;
56 __le32 new_map; 94 __le32 new_map;
57 __le32 seq; 95 __le32 seq;
58 __le64 padding[2]; 96};
97
98struct log_group {
99 struct log_entry ent[4];
59}; 100};
60 101
61struct btt_sb { 102struct btt_sb {
@@ -125,6 +166,8 @@ struct aligned_lock {
125 * @list: List head for list of arenas 166 * @list: List head for list of arenas
126 * @debugfs_dir: Debugfs dentry 167 * @debugfs_dir: Debugfs dentry
127 * @flags: Arena flags - may signify error states. 168 * @flags: Arena flags - may signify error states.
169 * @err_lock: Mutex for synchronizing error clearing.
170 * @log_index: Indices of the valid log entries in a log_group
128 * 171 *
129 * arena_info is a per-arena handle. Once an arena is narrowed down for an 172 * arena_info is a per-arena handle. Once an arena is narrowed down for an
130 * IO, this struct is passed around for the duration of the IO. 173 * IO, this struct is passed around for the duration of the IO.
@@ -157,6 +200,7 @@ struct arena_info {
157 /* Arena flags */ 200 /* Arena flags */
158 u32 flags; 201 u32 flags;
159 struct mutex err_lock; 202 struct mutex err_lock;
203 int log_index[2];
160}; 204};
161 205
162/** 206/**
@@ -176,6 +220,7 @@ struct arena_info {
176 * @init_lock: Mutex used for the BTT initialization 220 * @init_lock: Mutex used for the BTT initialization
177 * @init_state: Flag describing the initialization state for the BTT 221 * @init_state: Flag describing the initialization state for the BTT
178 * @num_arenas: Number of arenas in the BTT instance 222 * @num_arenas: Number of arenas in the BTT instance
223 * @phys_bb: Pointer to the namespace's badblocks structure
179 */ 224 */
180struct btt { 225struct btt {
181 struct gendisk *btt_disk; 226 struct gendisk *btt_disk;
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 65cc171c721d..2adada1a5855 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -364,9 +364,9 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
364int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) 364int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
365{ 365{
366 u64 checksum, offset; 366 u64 checksum, offset;
367 unsigned long align;
368 enum nd_pfn_mode mode; 367 enum nd_pfn_mode mode;
369 struct nd_namespace_io *nsio; 368 struct nd_namespace_io *nsio;
369 unsigned long align, start_pad;
370 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 370 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
371 struct nd_namespace_common *ndns = nd_pfn->ndns; 371 struct nd_namespace_common *ndns = nd_pfn->ndns;
372 const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev); 372 const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev);
@@ -410,6 +410,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
410 410
411 align = le32_to_cpu(pfn_sb->align); 411 align = le32_to_cpu(pfn_sb->align);
412 offset = le64_to_cpu(pfn_sb->dataoff); 412 offset = le64_to_cpu(pfn_sb->dataoff);
413 start_pad = le32_to_cpu(pfn_sb->start_pad);
413 if (align == 0) 414 if (align == 0)
414 align = 1UL << ilog2(offset); 415 align = 1UL << ilog2(offset);
415 mode = le32_to_cpu(pfn_sb->mode); 416 mode = le32_to_cpu(pfn_sb->mode);
@@ -468,7 +469,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
468 return -EBUSY; 469 return -EBUSY;
469 } 470 }
470 471
471 if ((align && !IS_ALIGNED(offset, align)) 472 if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align))
472 || !IS_ALIGNED(offset, PAGE_SIZE)) { 473 || !IS_ALIGNED(offset, PAGE_SIZE)) {
473 dev_err(&nd_pfn->dev, 474 dev_err(&nd_pfn->dev,
474 "bad offset: %#llx dax disabled align: %#lx\n", 475 "bad offset: %#llx dax disabled align: %#lx\n",
@@ -582,6 +583,12 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
582 return altmap; 583 return altmap;
583} 584}
584 585
586static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys)
587{
588 return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys),
589 ALIGN_DOWN(phys, nd_pfn->align));
590}
591
585static int nd_pfn_init(struct nd_pfn *nd_pfn) 592static int nd_pfn_init(struct nd_pfn *nd_pfn)
586{ 593{
587 u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0; 594 u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0;
@@ -637,13 +644,16 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
637 start = nsio->res.start; 644 start = nsio->res.start;
638 size = PHYS_SECTION_ALIGN_UP(start + size) - start; 645 size = PHYS_SECTION_ALIGN_UP(start + size) - start;
639 if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, 646 if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
640 IORES_DESC_NONE) == REGION_MIXED) { 647 IORES_DESC_NONE) == REGION_MIXED
648 || !IS_ALIGNED(start + resource_size(&nsio->res),
649 nd_pfn->align)) {
641 size = resource_size(&nsio->res); 650 size = resource_size(&nsio->res);
642 end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size); 651 end_trunc = start + size - phys_pmem_align_down(nd_pfn,
652 start + size);
643 } 653 }
644 654
645 if (start_pad + end_trunc) 655 if (start_pad + end_trunc)
646 dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n", 656 dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n",
647 dev_name(&ndns->dev), start_pad + end_trunc); 657 dev_name(&ndns->dev), start_pad + end_trunc);
648 658
649 /* 659 /*
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f837d666cbd4..1e46e60b8f10 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1287,7 +1287,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl,
1287 BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < 1287 BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
1288 NVME_DSM_MAX_RANGES); 1288 NVME_DSM_MAX_RANGES);
1289 1289
1290 queue->limits.discard_alignment = size; 1290 queue->limits.discard_alignment = 0;
1291 queue->limits.discard_granularity = size; 1291 queue->limits.discard_granularity = size;
1292 1292
1293 blk_queue_max_discard_sectors(queue, UINT_MAX); 1293 blk_queue_max_discard_sectors(queue, UINT_MAX);
@@ -1705,7 +1705,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
1705 blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); 1705 blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
1706 blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); 1706 blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
1707 } 1707 }
1708 if (ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) 1708 if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
1709 is_power_of_2(ctrl->max_hw_sectors))
1709 blk_queue_chunk_sectors(q, ctrl->max_hw_sectors); 1710 blk_queue_chunk_sectors(q, ctrl->max_hw_sectors);
1710 blk_queue_virt_boundary(q, ctrl->page_size - 1); 1711 blk_queue_virt_boundary(q, ctrl->page_size - 1);
1711 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) 1712 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
@@ -2869,7 +2870,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2869 2870
2870 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); 2871 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
2871 nvme_set_queue_limits(ctrl, ns->queue); 2872 nvme_set_queue_limits(ctrl, ns->queue);
2872 nvme_setup_streams_ns(ctrl, ns);
2873 2873
2874 id = nvme_identify_ns(ctrl, nsid); 2874 id = nvme_identify_ns(ctrl, nsid);
2875 if (!id) 2875 if (!id)
@@ -2880,6 +2880,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2880 2880
2881 if (nvme_init_ns_head(ns, nsid, id, &new)) 2881 if (nvme_init_ns_head(ns, nsid, id, &new))
2882 goto out_free_id; 2882 goto out_free_id;
2883 nvme_setup_streams_ns(ctrl, ns);
2883 2884
2884#ifdef CONFIG_NVME_MULTIPATH 2885#ifdef CONFIG_NVME_MULTIPATH
2885 /* 2886 /*
@@ -2965,8 +2966,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
2965 return; 2966 return;
2966 2967
2967 if (ns->disk && ns->disk->flags & GENHD_FL_UP) { 2968 if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
2968 if (blk_get_integrity(ns->disk))
2969 blk_integrity_unregister(ns->disk);
2970 nvme_mpath_remove_disk_links(ns); 2969 nvme_mpath_remove_disk_links(ns);
2971 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, 2970 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
2972 &nvme_ns_id_attr_group); 2971 &nvme_ns_id_attr_group);
@@ -2974,6 +2973,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
2974 nvme_nvm_unregister_sysfs(ns); 2973 nvme_nvm_unregister_sysfs(ns);
2975 del_gendisk(ns->disk); 2974 del_gendisk(ns->disk);
2976 blk_cleanup_queue(ns->queue); 2975 blk_cleanup_queue(ns->queue);
2976 if (blk_get_integrity(ns->disk))
2977 blk_integrity_unregister(ns->disk);
2977 } 2978 }
2978 2979
2979 mutex_lock(&ns->ctrl->subsys->lock); 2980 mutex_lock(&ns->ctrl->subsys->lock);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 0a8af4daef89..794e66e4aa20 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3221,7 +3221,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
3221 3221
3222 /* initiate nvme ctrl ref counting teardown */ 3222 /* initiate nvme ctrl ref counting teardown */
3223 nvme_uninit_ctrl(&ctrl->ctrl); 3223 nvme_uninit_ctrl(&ctrl->ctrl);
3224 nvme_put_ctrl(&ctrl->ctrl);
3225 3224
3226 /* Remove core ctrl ref. */ 3225 /* Remove core ctrl ref. */
3227 nvme_put_ctrl(&ctrl->ctrl); 3226 nvme_put_ctrl(&ctrl->ctrl);
diff --git a/drivers/nvmem/meson-mx-efuse.c b/drivers/nvmem/meson-mx-efuse.c
index a346b4923550..41d3a3c1104e 100644
--- a/drivers/nvmem/meson-mx-efuse.c
+++ b/drivers/nvmem/meson-mx-efuse.c
@@ -156,8 +156,8 @@ static int meson_mx_efuse_read(void *context, unsigned int offset,
156 MESON_MX_EFUSE_CNTL1_AUTO_RD_ENABLE, 156 MESON_MX_EFUSE_CNTL1_AUTO_RD_ENABLE,
157 MESON_MX_EFUSE_CNTL1_AUTO_RD_ENABLE); 157 MESON_MX_EFUSE_CNTL1_AUTO_RD_ENABLE);
158 158
159 for (i = offset; i < offset + bytes; i += efuse->config.word_size) { 159 for (i = 0; i < bytes; i += efuse->config.word_size) {
160 addr = i / efuse->config.word_size; 160 addr = (offset + i) / efuse->config.word_size;
161 161
162 err = meson_mx_efuse_read_addr(efuse, addr, &tmp); 162 err = meson_mx_efuse_read_addr(efuse, addr, &tmp);
163 if (err) 163 if (err)
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 98258583abb0..3481e69738b5 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -81,6 +81,7 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio,
81 * can be looked up later */ 81 * can be looked up later */
82 of_node_get(child); 82 of_node_get(child);
83 phy->mdio.dev.of_node = child; 83 phy->mdio.dev.of_node = child;
84 phy->mdio.dev.fwnode = of_fwnode_handle(child);
84 85
85 /* All data is now stored in the phy struct; 86 /* All data is now stored in the phy struct;
86 * register it */ 87 * register it */
@@ -111,6 +112,7 @@ static int of_mdiobus_register_device(struct mii_bus *mdio,
111 */ 112 */
112 of_node_get(child); 113 of_node_get(child);
113 mdiodev->dev.of_node = child; 114 mdiodev->dev.of_node = child;
115 mdiodev->dev.fwnode = of_fwnode_handle(child);
114 116
115 /* All data is now stored in the mdiodev struct; register it. */ 117 /* All data is now stored in the mdiodev struct; register it. */
116 rc = mdio_device_register(mdiodev); 118 rc = mdio_device_register(mdiodev);
@@ -206,6 +208,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
206 mdio->phy_mask = ~0; 208 mdio->phy_mask = ~0;
207 209
208 mdio->dev.of_node = np; 210 mdio->dev.of_node = np;
211 mdio->dev.fwnode = of_fwnode_handle(np);
209 212
210 /* Get bus level PHY reset GPIO details */ 213 /* Get bus level PHY reset GPIO details */
211 mdio->reset_delay_us = DEFAULT_GPIO_RESET_DELAY; 214 mdio->reset_delay_us = DEFAULT_GPIO_RESET_DELAY;
diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c
index a25fed52f7e9..41b740aed3a3 100644
--- a/drivers/parisc/lba_pci.c
+++ b/drivers/parisc/lba_pci.c
@@ -1692,3 +1692,36 @@ void lba_set_iregs(struct parisc_device *lba, u32 ibase, u32 imask)
1692 iounmap(base_addr); 1692 iounmap(base_addr);
1693} 1693}
1694 1694
1695
1696/*
1697 * The design of the Diva management card in rp34x0 machines (rp3410, rp3440)
1698 * seems rushed, so that many built-in components simply don't work.
1699 * The following quirks disable the serial AUX port and the built-in ATI RV100
1700 * Radeon 7000 graphics card which both don't have any external connectors and
1701 * thus are useless, and even worse, e.g. the AUX port occupies ttyS0 and as
1702 * such makes those machines the only PARISC machines on which we can't use
1703 * ttyS0 as boot console.
1704 */
1705static void quirk_diva_ati_card(struct pci_dev *dev)
1706{
1707 if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
1708 dev->subsystem_device != 0x1292)
1709 return;
1710
1711 dev_info(&dev->dev, "Hiding Diva built-in ATI card");
1712 dev->device = 0;
1713}
1714DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RADEON_QY,
1715 quirk_diva_ati_card);
1716
1717static void quirk_diva_aux_disable(struct pci_dev *dev)
1718{
1719 if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
1720 dev->subsystem_device != 0x1291)
1721 return;
1722
1723 dev_info(&dev->dev, "Hiding Diva built-in AUX serial device");
1724 dev->device = 0;
1725}
1726DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_DIVA_AUX,
1727 quirk_diva_aux_disable);
diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 04dac6a42c9f..6b8d060d07de 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -985,9 +985,7 @@ static u32 hv_compose_msi_req_v1(
985 int_pkt->wslot.slot = slot; 985 int_pkt->wslot.slot = slot;
986 int_pkt->int_desc.vector = vector; 986 int_pkt->int_desc.vector = vector;
987 int_pkt->int_desc.vector_count = 1; 987 int_pkt->int_desc.vector_count = 1;
988 int_pkt->int_desc.delivery_mode = 988 int_pkt->int_desc.delivery_mode = dest_Fixed;
989 (apic->irq_delivery_mode == dest_LowestPrio) ?
990 dest_LowestPrio : dest_Fixed;
991 989
992 /* 990 /*
993 * Create MSI w/ dummy vCPU set, overwritten by subsequent retarget in 991 * Create MSI w/ dummy vCPU set, overwritten by subsequent retarget in
@@ -1008,9 +1006,7 @@ static u32 hv_compose_msi_req_v2(
1008 int_pkt->wslot.slot = slot; 1006 int_pkt->wslot.slot = slot;
1009 int_pkt->int_desc.vector = vector; 1007 int_pkt->int_desc.vector = vector;
1010 int_pkt->int_desc.vector_count = 1; 1008 int_pkt->int_desc.vector_count = 1;
1011 int_pkt->int_desc.delivery_mode = 1009 int_pkt->int_desc.delivery_mode = dest_Fixed;
1012 (apic->irq_delivery_mode == dest_LowestPrio) ?
1013 dest_LowestPrio : dest_Fixed;
1014 1010
1015 /* 1011 /*
1016 * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten 1012 * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c
index 12796eccb2be..52ab3cb0a0bf 100644
--- a/drivers/pci/host/pcie-rcar.c
+++ b/drivers/pci/host/pcie-rcar.c
@@ -1128,12 +1128,12 @@ static int rcar_pcie_probe(struct platform_device *pdev)
1128 err = rcar_pcie_get_resources(pcie); 1128 err = rcar_pcie_get_resources(pcie);
1129 if (err < 0) { 1129 if (err < 0) {
1130 dev_err(dev, "failed to request resources: %d\n", err); 1130 dev_err(dev, "failed to request resources: %d\n", err);
1131 goto err_free_bridge; 1131 goto err_free_resource_list;
1132 } 1132 }
1133 1133
1134 err = rcar_pcie_parse_map_dma_ranges(pcie, dev->of_node); 1134 err = rcar_pcie_parse_map_dma_ranges(pcie, dev->of_node);
1135 if (err) 1135 if (err)
1136 goto err_free_bridge; 1136 goto err_free_resource_list;
1137 1137
1138 pm_runtime_enable(dev); 1138 pm_runtime_enable(dev);
1139 err = pm_runtime_get_sync(dev); 1139 err = pm_runtime_get_sync(dev);
@@ -1176,9 +1176,9 @@ err_pm_put:
1176err_pm_disable: 1176err_pm_disable:
1177 pm_runtime_disable(dev); 1177 pm_runtime_disable(dev);
1178 1178
1179err_free_bridge: 1179err_free_resource_list:
1180 pci_free_host_bridge(bridge);
1181 pci_free_resource_list(&pcie->resources); 1180 pci_free_resource_list(&pcie->resources);
1181 pci_free_host_bridge(bridge);
1182 1182
1183 return err; 1183 return err;
1184} 1184}
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 7f47bb72bf30..14fd865a5120 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -999,7 +999,7 @@ static int pci_pm_thaw_noirq(struct device *dev)
999 * the subsequent "thaw" callbacks for the device. 999 * the subsequent "thaw" callbacks for the device.
1000 */ 1000 */
1001 if (dev_pm_smart_suspend_and_suspended(dev)) { 1001 if (dev_pm_smart_suspend_and_suspended(dev)) {
1002 dev->power.direct_complete = true; 1002 dev_pm_skip_next_resume_phases(dev);
1003 return 0; 1003 return 0;
1004 } 1004 }
1005 1005
@@ -1012,7 +1012,12 @@ static int pci_pm_thaw_noirq(struct device *dev)
1012 if (pci_has_legacy_pm_support(pci_dev)) 1012 if (pci_has_legacy_pm_support(pci_dev))
1013 return pci_legacy_resume_early(dev); 1013 return pci_legacy_resume_early(dev);
1014 1014
1015 pci_update_current_state(pci_dev, PCI_D0); 1015 /*
1016 * pci_restore_state() requires the device to be in D0 (because of MSI
1017 * restoration among other things), so force it into D0 in case the
1018 * driver's "freeze" callbacks put it into a low-power state directly.
1019 */
1020 pci_set_power_state(pci_dev, PCI_D0);
1016 pci_restore_state(pci_dev); 1021 pci_restore_state(pci_dev);
1017 1022
1018 if (drv && drv->pm && drv->pm->thaw_noirq) 1023 if (drv && drv->pm && drv->pm->thaw_noirq)
diff --git a/drivers/phy/motorola/phy-cpcap-usb.c b/drivers/phy/motorola/phy-cpcap-usb.c
index accaaaccb662..6601ad0dfb3a 100644
--- a/drivers/phy/motorola/phy-cpcap-usb.c
+++ b/drivers/phy/motorola/phy-cpcap-usb.c
@@ -310,7 +310,7 @@ static int cpcap_usb_init_irq(struct platform_device *pdev,
310 int irq, error; 310 int irq, error;
311 311
312 irq = platform_get_irq_byname(pdev, name); 312 irq = platform_get_irq_byname(pdev, name);
313 if (!irq) 313 if (irq < 0)
314 return -ENODEV; 314 return -ENODEV;
315 315
316 error = devm_request_threaded_irq(ddata->dev, irq, NULL, 316 error = devm_request_threaded_irq(ddata->dev, irq, NULL,
diff --git a/drivers/phy/renesas/Kconfig b/drivers/phy/renesas/Kconfig
index cb09245e9b4c..c845facacb06 100644
--- a/drivers/phy/renesas/Kconfig
+++ b/drivers/phy/renesas/Kconfig
@@ -12,7 +12,9 @@ config PHY_RCAR_GEN3_USB2
12 tristate "Renesas R-Car generation 3 USB 2.0 PHY driver" 12 tristate "Renesas R-Car generation 3 USB 2.0 PHY driver"
13 depends on ARCH_RENESAS 13 depends on ARCH_RENESAS
14 depends on EXTCON 14 depends on EXTCON
15 depends on USB_SUPPORT
15 select GENERIC_PHY 16 select GENERIC_PHY
17 select USB_COMMON
16 help 18 help
17 Support for USB 2.0 PHY found on Renesas R-Car generation 3 SoCs. 19 Support for USB 2.0 PHY found on Renesas R-Car generation 3 SoCs.
18 20
diff --git a/drivers/phy/rockchip/phy-rockchip-typec.c b/drivers/phy/rockchip/phy-rockchip-typec.c
index ee85fa0ca4b0..7492c8978217 100644
--- a/drivers/phy/rockchip/phy-rockchip-typec.c
+++ b/drivers/phy/rockchip/phy-rockchip-typec.c
@@ -1137,6 +1137,7 @@ static int rockchip_typec_phy_probe(struct platform_device *pdev)
1137 if (IS_ERR(phy)) { 1137 if (IS_ERR(phy)) {
1138 dev_err(dev, "failed to create phy: %s\n", 1138 dev_err(dev, "failed to create phy: %s\n",
1139 child_np->name); 1139 child_np->name);
1140 pm_runtime_disable(dev);
1140 return PTR_ERR(phy); 1141 return PTR_ERR(phy);
1141 } 1142 }
1142 1143
@@ -1146,6 +1147,7 @@ static int rockchip_typec_phy_probe(struct platform_device *pdev)
1146 phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); 1147 phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
1147 if (IS_ERR(phy_provider)) { 1148 if (IS_ERR(phy_provider)) {
1148 dev_err(dev, "Failed to register phy provider\n"); 1149 dev_err(dev, "Failed to register phy provider\n");
1150 pm_runtime_disable(dev);
1149 return PTR_ERR(phy_provider); 1151 return PTR_ERR(phy_provider);
1150 } 1152 }
1151 1153
diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c
index 4307bf0013e1..63e916d4d069 100644
--- a/drivers/phy/tegra/xusb.c
+++ b/drivers/phy/tegra/xusb.c
@@ -75,14 +75,14 @@ MODULE_DEVICE_TABLE(of, tegra_xusb_padctl_of_match);
75static struct device_node * 75static struct device_node *
76tegra_xusb_find_pad_node(struct tegra_xusb_padctl *padctl, const char *name) 76tegra_xusb_find_pad_node(struct tegra_xusb_padctl *padctl, const char *name)
77{ 77{
78 /* 78 struct device_node *pads, *np;
79 * of_find_node_by_name() drops a reference, so make sure to grab one. 79
80 */ 80 pads = of_get_child_by_name(padctl->dev->of_node, "pads");
81 struct device_node *np = of_node_get(padctl->dev->of_node); 81 if (!pads)
82 return NULL;
82 83
83 np = of_find_node_by_name(np, "pads"); 84 np = of_get_child_by_name(pads, name);
84 if (np) 85 of_node_put(pads);
85 np = of_find_node_by_name(np, name);
86 86
87 return np; 87 return np;
88} 88}
@@ -90,16 +90,16 @@ tegra_xusb_find_pad_node(struct tegra_xusb_padctl *padctl, const char *name)
90static struct device_node * 90static struct device_node *
91tegra_xusb_pad_find_phy_node(struct tegra_xusb_pad *pad, unsigned int index) 91tegra_xusb_pad_find_phy_node(struct tegra_xusb_pad *pad, unsigned int index)
92{ 92{
93 /* 93 struct device_node *np, *lanes;
94 * of_find_node_by_name() drops a reference, so make sure to grab one.
95 */
96 struct device_node *np = of_node_get(pad->dev.of_node);
97 94
98 np = of_find_node_by_name(np, "lanes"); 95 lanes = of_get_child_by_name(pad->dev.of_node, "lanes");
99 if (!np) 96 if (!lanes)
100 return NULL; 97 return NULL;
101 98
102 return of_find_node_by_name(np, pad->soc->lanes[index].name); 99 np = of_get_child_by_name(lanes, pad->soc->lanes[index].name);
100 of_node_put(lanes);
101
102 return np;
103} 103}
104 104
105static int 105static int
@@ -195,7 +195,7 @@ int tegra_xusb_pad_register(struct tegra_xusb_pad *pad,
195 unsigned int i; 195 unsigned int i;
196 int err; 196 int err;
197 197
198 children = of_find_node_by_name(pad->dev.of_node, "lanes"); 198 children = of_get_child_by_name(pad->dev.of_node, "lanes");
199 if (!children) 199 if (!children)
200 return -ENODEV; 200 return -ENODEV;
201 201
@@ -444,21 +444,21 @@ static struct device_node *
444tegra_xusb_find_port_node(struct tegra_xusb_padctl *padctl, const char *type, 444tegra_xusb_find_port_node(struct tegra_xusb_padctl *padctl, const char *type,
445 unsigned int index) 445 unsigned int index)
446{ 446{
447 /* 447 struct device_node *ports, *np;
448 * of_find_node_by_name() drops a reference, so make sure to grab one. 448 char *name;
449 */
450 struct device_node *np = of_node_get(padctl->dev->of_node);
451 449
452 np = of_find_node_by_name(np, "ports"); 450 ports = of_get_child_by_name(padctl->dev->of_node, "ports");
453 if (np) { 451 if (!ports)
454 char *name; 452 return NULL;
455 453
456 name = kasprintf(GFP_KERNEL, "%s-%u", type, index); 454 name = kasprintf(GFP_KERNEL, "%s-%u", type, index);
457 if (!name) 455 if (!name) {
458 return ERR_PTR(-ENOMEM); 456 of_node_put(ports);
459 np = of_find_node_by_name(np, name); 457 return ERR_PTR(-ENOMEM);
460 kfree(name);
461 } 458 }
459 np = of_get_child_by_name(ports, name);
460 kfree(name);
461 of_node_put(ports);
462 462
463 return np; 463 return np;
464} 464}
@@ -847,7 +847,7 @@ static void tegra_xusb_remove_ports(struct tegra_xusb_padctl *padctl)
847 847
848static int tegra_xusb_padctl_probe(struct platform_device *pdev) 848static int tegra_xusb_padctl_probe(struct platform_device *pdev)
849{ 849{
850 struct device_node *np = of_node_get(pdev->dev.of_node); 850 struct device_node *np = pdev->dev.of_node;
851 const struct tegra_xusb_padctl_soc *soc; 851 const struct tegra_xusb_padctl_soc *soc;
852 struct tegra_xusb_padctl *padctl; 852 struct tegra_xusb_padctl *padctl;
853 const struct of_device_id *match; 853 const struct of_device_id *match;
@@ -855,7 +855,7 @@ static int tegra_xusb_padctl_probe(struct platform_device *pdev)
855 int err; 855 int err;
856 856
857 /* for backwards compatibility with old device trees */ 857 /* for backwards compatibility with old device trees */
858 np = of_find_node_by_name(np, "pads"); 858 np = of_get_child_by_name(np, "pads");
859 if (!np) { 859 if (!np) {
860 dev_warn(&pdev->dev, "deprecated DT, using legacy driver\n"); 860 dev_warn(&pdev->dev, "deprecated DT, using legacy driver\n");
861 return tegra_xusb_padctl_legacy_probe(pdev); 861 return tegra_xusb_padctl_legacy_probe(pdev);
diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
index bdedb6325c72..4471fd94e1fe 100644
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
@@ -1620,6 +1620,22 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
1620 clear_bit(i, chip->irq.valid_mask); 1620 clear_bit(i, chip->irq.valid_mask);
1621 } 1621 }
1622 1622
1623 /*
1624 * The same set of machines in chv_no_valid_mask[] have incorrectly
1625 * configured GPIOs that generate spurious interrupts so we use
1626 * this same list to apply another quirk for them.
1627 *
1628 * See also https://bugzilla.kernel.org/show_bug.cgi?id=197953.
1629 */
1630 if (!need_valid_mask) {
1631 /*
1632 * Mask all interrupts the community is able to generate
1633 * but leave the ones that can only generate GPEs unmasked.
1634 */
1635 chv_writel(GENMASK(31, pctrl->community->nirqs),
1636 pctrl->regs + CHV_INTMASK);
1637 }
1638
1623 /* Clear all interrupts */ 1639 /* Clear all interrupts */
1624 chv_writel(0xffff, pctrl->regs + CHV_INTSTAT); 1640 chv_writel(0xffff, pctrl->regs + CHV_INTSTAT);
1625 1641
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index e6cd8de793e2..3501491e5bfc 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -222,6 +222,9 @@ static enum pin_config_param pcs_bias[] = {
222 */ 222 */
223static struct lock_class_key pcs_lock_class; 223static struct lock_class_key pcs_lock_class;
224 224
225/* Class for the IRQ request mutex */
226static struct lock_class_key pcs_request_class;
227
225/* 228/*
226 * REVISIT: Reads and writes could eventually use regmap or something 229 * REVISIT: Reads and writes could eventually use regmap or something
227 * generic. But at least on omaps, some mux registers are performance 230 * generic. But at least on omaps, some mux registers are performance
@@ -1486,7 +1489,7 @@ static int pcs_irqdomain_map(struct irq_domain *d, unsigned int irq,
1486 irq_set_chip_data(irq, pcs_soc); 1489 irq_set_chip_data(irq, pcs_soc);
1487 irq_set_chip_and_handler(irq, &pcs->chip, 1490 irq_set_chip_and_handler(irq, &pcs->chip,
1488 handle_level_irq); 1491 handle_level_irq);
1489 irq_set_lockdep_class(irq, &pcs_lock_class); 1492 irq_set_lockdep_class(irq, &pcs_lock_class, &pcs_request_class);
1490 irq_set_noprobe(irq); 1493 irq_set_noprobe(irq);
1491 1494
1492 return 0; 1495 return 0;
diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c
index a276c61be217..e62ab087bfd8 100644
--- a/drivers/pinctrl/stm32/pinctrl-stm32.c
+++ b/drivers/pinctrl/stm32/pinctrl-stm32.c
@@ -290,7 +290,7 @@ static int stm32_gpio_domain_translate(struct irq_domain *d,
290} 290}
291 291
292static int stm32_gpio_domain_activate(struct irq_domain *d, 292static int stm32_gpio_domain_activate(struct irq_domain *d,
293 struct irq_data *irq_data, bool early) 293 struct irq_data *irq_data, bool reserve)
294{ 294{
295 struct stm32_gpio_bank *bank = d->host_data; 295 struct stm32_gpio_bank *bank = d->host_data;
296 struct stm32_pinctrl *pctl = dev_get_drvdata(bank->gpio_chip.parent); 296 struct stm32_pinctrl *pctl = dev_get_drvdata(bank->gpio_chip.parent);
diff --git a/drivers/platform/x86/asus-wireless.c b/drivers/platform/x86/asus-wireless.c
index f3796164329e..d4aeac3477f5 100644
--- a/drivers/platform/x86/asus-wireless.c
+++ b/drivers/platform/x86/asus-wireless.c
@@ -118,6 +118,7 @@ static void asus_wireless_notify(struct acpi_device *adev, u32 event)
118 return; 118 return;
119 } 119 }
120 input_report_key(data->idev, KEY_RFKILL, 1); 120 input_report_key(data->idev, KEY_RFKILL, 1);
121 input_sync(data->idev);
121 input_report_key(data->idev, KEY_RFKILL, 0); 122 input_report_key(data->idev, KEY_RFKILL, 0);
122 input_sync(data->idev); 123 input_sync(data->idev);
123} 124}
diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c
index bf897b1832b1..cd4725e7e0b5 100644
--- a/drivers/platform/x86/dell-laptop.c
+++ b/drivers/platform/x86/dell-laptop.c
@@ -37,6 +37,7 @@
37 37
38struct quirk_entry { 38struct quirk_entry {
39 u8 touchpad_led; 39 u8 touchpad_led;
40 u8 kbd_led_levels_off_1;
40 41
41 int needs_kbd_timeouts; 42 int needs_kbd_timeouts;
42 /* 43 /*
@@ -67,6 +68,10 @@ static struct quirk_entry quirk_dell_xps13_9333 = {
67 .kbd_timeouts = { 0, 5, 15, 60, 5 * 60, 15 * 60, -1 }, 68 .kbd_timeouts = { 0, 5, 15, 60, 5 * 60, 15 * 60, -1 },
68}; 69};
69 70
71static struct quirk_entry quirk_dell_latitude_e6410 = {
72 .kbd_led_levels_off_1 = 1,
73};
74
70static struct platform_driver platform_driver = { 75static struct platform_driver platform_driver = {
71 .driver = { 76 .driver = {
72 .name = "dell-laptop", 77 .name = "dell-laptop",
@@ -269,6 +274,15 @@ static const struct dmi_system_id dell_quirks[] __initconst = {
269 }, 274 },
270 .driver_data = &quirk_dell_xps13_9333, 275 .driver_data = &quirk_dell_xps13_9333,
271 }, 276 },
277 {
278 .callback = dmi_matched,
279 .ident = "Dell Latitude E6410",
280 .matches = {
281 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
282 DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6410"),
283 },
284 .driver_data = &quirk_dell_latitude_e6410,
285 },
272 { } 286 { }
273}; 287};
274 288
@@ -1149,6 +1163,9 @@ static int kbd_get_info(struct kbd_info *info)
1149 units = (buffer->output[2] >> 8) & 0xFF; 1163 units = (buffer->output[2] >> 8) & 0xFF;
1150 info->levels = (buffer->output[2] >> 16) & 0xFF; 1164 info->levels = (buffer->output[2] >> 16) & 0xFF;
1151 1165
1166 if (quirks && quirks->kbd_led_levels_off_1 && info->levels)
1167 info->levels--;
1168
1152 if (units & BIT(0)) 1169 if (units & BIT(0))
1153 info->seconds = (buffer->output[3] >> 0) & 0xFF; 1170 info->seconds = (buffer->output[3] >> 0) & 0xFF;
1154 if (units & BIT(1)) 1171 if (units & BIT(1))
diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c
index 39d2f4518483..fb25b20df316 100644
--- a/drivers/platform/x86/dell-wmi.c
+++ b/drivers/platform/x86/dell-wmi.c
@@ -639,6 +639,8 @@ static int dell_wmi_events_set_enabled(bool enable)
639 int ret; 639 int ret;
640 640
641 buffer = kzalloc(sizeof(struct calling_interface_buffer), GFP_KERNEL); 641 buffer = kzalloc(sizeof(struct calling_interface_buffer), GFP_KERNEL);
642 if (!buffer)
643 return -ENOMEM;
642 buffer->cmd_class = CLASS_INFO; 644 buffer->cmd_class = CLASS_INFO;
643 buffer->cmd_select = SELECT_APP_REGISTRATION; 645 buffer->cmd_select = SELECT_APP_REGISTRATION;
644 buffer->input[0] = 0x10000; 646 buffer->input[0] = 0x10000;
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 15015a24f8ad..badf42acbf95 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -565,9 +565,9 @@ enum qeth_cq {
565}; 565};
566 566
567struct qeth_ipato { 567struct qeth_ipato {
568 int enabled; 568 bool enabled;
569 int invert4; 569 bool invert4;
570 int invert6; 570 bool invert6;
571 struct list_head entries; 571 struct list_head entries;
572}; 572};
573 573
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 430e3214f7e2..3614df68830f 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1480,9 +1480,9 @@ static int qeth_setup_card(struct qeth_card *card)
1480 qeth_set_intial_options(card); 1480 qeth_set_intial_options(card);
1481 /* IP address takeover */ 1481 /* IP address takeover */
1482 INIT_LIST_HEAD(&card->ipato.entries); 1482 INIT_LIST_HEAD(&card->ipato.entries);
1483 card->ipato.enabled = 0; 1483 card->ipato.enabled = false;
1484 card->ipato.invert4 = 0; 1484 card->ipato.invert4 = false;
1485 card->ipato.invert6 = 0; 1485 card->ipato.invert6 = false;
1486 /* init QDIO stuff */ 1486 /* init QDIO stuff */
1487 qeth_init_qdio_info(card); 1487 qeth_init_qdio_info(card);
1488 INIT_DELAYED_WORK(&card->buffer_reclaim_work, qeth_buffer_reclaim_work); 1488 INIT_DELAYED_WORK(&card->buffer_reclaim_work, qeth_buffer_reclaim_work);
@@ -5386,6 +5386,13 @@ out:
5386} 5386}
5387EXPORT_SYMBOL_GPL(qeth_poll); 5387EXPORT_SYMBOL_GPL(qeth_poll);
5388 5388
5389static int qeth_setassparms_inspect_rc(struct qeth_ipa_cmd *cmd)
5390{
5391 if (!cmd->hdr.return_code)
5392 cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code;
5393 return cmd->hdr.return_code;
5394}
5395
5389int qeth_setassparms_cb(struct qeth_card *card, 5396int qeth_setassparms_cb(struct qeth_card *card,
5390 struct qeth_reply *reply, unsigned long data) 5397 struct qeth_reply *reply, unsigned long data)
5391{ 5398{
@@ -6242,7 +6249,7 @@ static int qeth_ipa_checksum_run_cmd_cb(struct qeth_card *card,
6242 (struct qeth_checksum_cmd *)reply->param; 6249 (struct qeth_checksum_cmd *)reply->param;
6243 6250
6244 QETH_CARD_TEXT(card, 4, "chkdoccb"); 6251 QETH_CARD_TEXT(card, 4, "chkdoccb");
6245 if (cmd->hdr.return_code) 6252 if (qeth_setassparms_inspect_rc(cmd))
6246 return 0; 6253 return 0;
6247 6254
6248 memset(chksum_cb, 0, sizeof(*chksum_cb)); 6255 memset(chksum_cb, 0, sizeof(*chksum_cb));
diff --git a/drivers/s390/net/qeth_l3.h b/drivers/s390/net/qeth_l3.h
index 194ae9b577cc..e5833837b799 100644
--- a/drivers/s390/net/qeth_l3.h
+++ b/drivers/s390/net/qeth_l3.h
@@ -82,7 +82,7 @@ void qeth_l3_del_vipa(struct qeth_card *, enum qeth_prot_versions, const u8 *);
82int qeth_l3_add_rxip(struct qeth_card *, enum qeth_prot_versions, const u8 *); 82int qeth_l3_add_rxip(struct qeth_card *, enum qeth_prot_versions, const u8 *);
83void qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions, 83void qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions,
84 const u8 *); 84 const u8 *);
85int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *, struct qeth_ipaddr *); 85void qeth_l3_update_ipato(struct qeth_card *card);
86struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions); 86struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions);
87int qeth_l3_add_ip(struct qeth_card *, struct qeth_ipaddr *); 87int qeth_l3_add_ip(struct qeth_card *, struct qeth_ipaddr *);
88int qeth_l3_delete_ip(struct qeth_card *, struct qeth_ipaddr *); 88int qeth_l3_delete_ip(struct qeth_card *, struct qeth_ipaddr *);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 6a73894b0cb5..ef0961e18686 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -164,8 +164,8 @@ static void qeth_l3_convert_addr_to_bits(u8 *addr, u8 *bits, int len)
164 } 164 }
165} 165}
166 166
167int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card, 167static bool qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card,
168 struct qeth_ipaddr *addr) 168 struct qeth_ipaddr *addr)
169{ 169{
170 struct qeth_ipato_entry *ipatoe; 170 struct qeth_ipato_entry *ipatoe;
171 u8 addr_bits[128] = {0, }; 171 u8 addr_bits[128] = {0, };
@@ -174,6 +174,8 @@ int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card,
174 174
175 if (!card->ipato.enabled) 175 if (!card->ipato.enabled)
176 return 0; 176 return 0;
177 if (addr->type != QETH_IP_TYPE_NORMAL)
178 return 0;
177 179
178 qeth_l3_convert_addr_to_bits((u8 *) &addr->u, addr_bits, 180 qeth_l3_convert_addr_to_bits((u8 *) &addr->u, addr_bits,
179 (addr->proto == QETH_PROT_IPV4)? 4:16); 181 (addr->proto == QETH_PROT_IPV4)? 4:16);
@@ -290,8 +292,7 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
290 memcpy(addr, tmp_addr, sizeof(struct qeth_ipaddr)); 292 memcpy(addr, tmp_addr, sizeof(struct qeth_ipaddr));
291 addr->ref_counter = 1; 293 addr->ref_counter = 1;
292 294
293 if (addr->type == QETH_IP_TYPE_NORMAL && 295 if (qeth_l3_is_addr_covered_by_ipato(card, addr)) {
294 qeth_l3_is_addr_covered_by_ipato(card, addr)) {
295 QETH_CARD_TEXT(card, 2, "tkovaddr"); 296 QETH_CARD_TEXT(card, 2, "tkovaddr");
296 addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG; 297 addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
297 } 298 }
@@ -605,6 +606,27 @@ int qeth_l3_setrouting_v6(struct qeth_card *card)
605/* 606/*
606 * IP address takeover related functions 607 * IP address takeover related functions
607 */ 608 */
609
610/**
611 * qeth_l3_update_ipato() - Update 'takeover' property, for all NORMAL IPs.
612 *
613 * Caller must hold ip_lock.
614 */
615void qeth_l3_update_ipato(struct qeth_card *card)
616{
617 struct qeth_ipaddr *addr;
618 unsigned int i;
619
620 hash_for_each(card->ip_htable, i, addr, hnode) {
621 if (addr->type != QETH_IP_TYPE_NORMAL)
622 continue;
623 if (qeth_l3_is_addr_covered_by_ipato(card, addr))
624 addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
625 else
626 addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG;
627 }
628}
629
608static void qeth_l3_clear_ipato_list(struct qeth_card *card) 630static void qeth_l3_clear_ipato_list(struct qeth_card *card)
609{ 631{
610 struct qeth_ipato_entry *ipatoe, *tmp; 632 struct qeth_ipato_entry *ipatoe, *tmp;
@@ -616,6 +638,7 @@ static void qeth_l3_clear_ipato_list(struct qeth_card *card)
616 kfree(ipatoe); 638 kfree(ipatoe);
617 } 639 }
618 640
641 qeth_l3_update_ipato(card);
619 spin_unlock_bh(&card->ip_lock); 642 spin_unlock_bh(&card->ip_lock);
620} 643}
621 644
@@ -640,8 +663,10 @@ int qeth_l3_add_ipato_entry(struct qeth_card *card,
640 } 663 }
641 } 664 }
642 665
643 if (!rc) 666 if (!rc) {
644 list_add_tail(&new->entry, &card->ipato.entries); 667 list_add_tail(&new->entry, &card->ipato.entries);
668 qeth_l3_update_ipato(card);
669 }
645 670
646 spin_unlock_bh(&card->ip_lock); 671 spin_unlock_bh(&card->ip_lock);
647 672
@@ -664,6 +689,7 @@ void qeth_l3_del_ipato_entry(struct qeth_card *card,
664 (proto == QETH_PROT_IPV4)? 4:16) && 689 (proto == QETH_PROT_IPV4)? 4:16) &&
665 (ipatoe->mask_bits == mask_bits)) { 690 (ipatoe->mask_bits == mask_bits)) {
666 list_del(&ipatoe->entry); 691 list_del(&ipatoe->entry);
692 qeth_l3_update_ipato(card);
667 kfree(ipatoe); 693 kfree(ipatoe);
668 } 694 }
669 } 695 }
diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c
index bd12fdf678be..6ea2b528a64e 100644
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -370,8 +370,8 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev,
370 struct device_attribute *attr, const char *buf, size_t count) 370 struct device_attribute *attr, const char *buf, size_t count)
371{ 371{
372 struct qeth_card *card = dev_get_drvdata(dev); 372 struct qeth_card *card = dev_get_drvdata(dev);
373 struct qeth_ipaddr *addr; 373 bool enable;
374 int i, rc = 0; 374 int rc = 0;
375 375
376 if (!card) 376 if (!card)
377 return -EINVAL; 377 return -EINVAL;
@@ -384,25 +384,18 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev,
384 } 384 }
385 385
386 if (sysfs_streq(buf, "toggle")) { 386 if (sysfs_streq(buf, "toggle")) {
387 card->ipato.enabled = (card->ipato.enabled)? 0 : 1; 387 enable = !card->ipato.enabled;
388 } else if (sysfs_streq(buf, "1")) { 388 } else if (kstrtobool(buf, &enable)) {
389 card->ipato.enabled = 1;
390 hash_for_each(card->ip_htable, i, addr, hnode) {
391 if ((addr->type == QETH_IP_TYPE_NORMAL) &&
392 qeth_l3_is_addr_covered_by_ipato(card, addr))
393 addr->set_flags |=
394 QETH_IPA_SETIP_TAKEOVER_FLAG;
395 }
396 } else if (sysfs_streq(buf, "0")) {
397 card->ipato.enabled = 0;
398 hash_for_each(card->ip_htable, i, addr, hnode) {
399 if (addr->set_flags &
400 QETH_IPA_SETIP_TAKEOVER_FLAG)
401 addr->set_flags &=
402 ~QETH_IPA_SETIP_TAKEOVER_FLAG;
403 }
404 } else
405 rc = -EINVAL; 389 rc = -EINVAL;
390 goto out;
391 }
392
393 if (card->ipato.enabled != enable) {
394 card->ipato.enabled = enable;
395 spin_lock_bh(&card->ip_lock);
396 qeth_l3_update_ipato(card);
397 spin_unlock_bh(&card->ip_lock);
398 }
406out: 399out:
407 mutex_unlock(&card->conf_mutex); 400 mutex_unlock(&card->conf_mutex);
408 return rc ? rc : count; 401 return rc ? rc : count;
@@ -428,20 +421,27 @@ static ssize_t qeth_l3_dev_ipato_invert4_store(struct device *dev,
428 const char *buf, size_t count) 421 const char *buf, size_t count)
429{ 422{
430 struct qeth_card *card = dev_get_drvdata(dev); 423 struct qeth_card *card = dev_get_drvdata(dev);
424 bool invert;
431 int rc = 0; 425 int rc = 0;
432 426
433 if (!card) 427 if (!card)
434 return -EINVAL; 428 return -EINVAL;
435 429
436 mutex_lock(&card->conf_mutex); 430 mutex_lock(&card->conf_mutex);
437 if (sysfs_streq(buf, "toggle")) 431 if (sysfs_streq(buf, "toggle")) {
438 card->ipato.invert4 = (card->ipato.invert4)? 0 : 1; 432 invert = !card->ipato.invert4;
439 else if (sysfs_streq(buf, "1")) 433 } else if (kstrtobool(buf, &invert)) {
440 card->ipato.invert4 = 1;
441 else if (sysfs_streq(buf, "0"))
442 card->ipato.invert4 = 0;
443 else
444 rc = -EINVAL; 434 rc = -EINVAL;
435 goto out;
436 }
437
438 if (card->ipato.invert4 != invert) {
439 card->ipato.invert4 = invert;
440 spin_lock_bh(&card->ip_lock);
441 qeth_l3_update_ipato(card);
442 spin_unlock_bh(&card->ip_lock);
443 }
444out:
445 mutex_unlock(&card->conf_mutex); 445 mutex_unlock(&card->conf_mutex);
446 return rc ? rc : count; 446 return rc ? rc : count;
447} 447}
@@ -607,20 +607,27 @@ static ssize_t qeth_l3_dev_ipato_invert6_store(struct device *dev,
607 struct device_attribute *attr, const char *buf, size_t count) 607 struct device_attribute *attr, const char *buf, size_t count)
608{ 608{
609 struct qeth_card *card = dev_get_drvdata(dev); 609 struct qeth_card *card = dev_get_drvdata(dev);
610 bool invert;
610 int rc = 0; 611 int rc = 0;
611 612
612 if (!card) 613 if (!card)
613 return -EINVAL; 614 return -EINVAL;
614 615
615 mutex_lock(&card->conf_mutex); 616 mutex_lock(&card->conf_mutex);
616 if (sysfs_streq(buf, "toggle")) 617 if (sysfs_streq(buf, "toggle")) {
617 card->ipato.invert6 = (card->ipato.invert6)? 0 : 1; 618 invert = !card->ipato.invert6;
618 else if (sysfs_streq(buf, "1")) 619 } else if (kstrtobool(buf, &invert)) {
619 card->ipato.invert6 = 1;
620 else if (sysfs_streq(buf, "0"))
621 card->ipato.invert6 = 0;
622 else
623 rc = -EINVAL; 620 rc = -EINVAL;
621 goto out;
622 }
623
624 if (card->ipato.invert6 != invert) {
625 card->ipato.invert6 = invert;
626 spin_lock_bh(&card->ip_lock);
627 qeth_l3_update_ipato(card);
628 spin_unlock_bh(&card->ip_lock);
629 }
630out:
624 mutex_unlock(&card->conf_mutex); 631 mutex_unlock(&card->conf_mutex);
625 return rc ? rc : count; 632 return rc ? rc : count;
626} 633}
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 6e3d81969a77..d52265416da2 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1725,6 +1725,7 @@ struct aac_dev
1725#define FIB_CONTEXT_FLAG_NATIVE_HBA (0x00000010) 1725#define FIB_CONTEXT_FLAG_NATIVE_HBA (0x00000010)
1726#define FIB_CONTEXT_FLAG_NATIVE_HBA_TMF (0x00000020) 1726#define FIB_CONTEXT_FLAG_NATIVE_HBA_TMF (0x00000020)
1727#define FIB_CONTEXT_FLAG_SCSI_CMD (0x00000040) 1727#define FIB_CONTEXT_FLAG_SCSI_CMD (0x00000040)
1728#define FIB_CONTEXT_FLAG_EH_RESET (0x00000080)
1728 1729
1729/* 1730/*
1730 * Define the command values 1731 * Define the command values
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index bec9f3193f60..80a8cb26cdea 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -2482,8 +2482,8 @@ int aac_command_thread(void *data)
2482 /* Synchronize our watches */ 2482 /* Synchronize our watches */
2483 if (((NSEC_PER_SEC - (NSEC_PER_SEC / HZ)) > now.tv_nsec) 2483 if (((NSEC_PER_SEC - (NSEC_PER_SEC / HZ)) > now.tv_nsec)
2484 && (now.tv_nsec > (NSEC_PER_SEC / HZ))) 2484 && (now.tv_nsec > (NSEC_PER_SEC / HZ)))
2485 difference = (((NSEC_PER_SEC - now.tv_nsec) * HZ) 2485 difference = HZ + HZ / 2 -
2486 + NSEC_PER_SEC / 2) / NSEC_PER_SEC; 2486 now.tv_nsec / (NSEC_PER_SEC / HZ);
2487 else { 2487 else {
2488 if (now.tv_nsec > NSEC_PER_SEC / 2) 2488 if (now.tv_nsec > NSEC_PER_SEC / 2)
2489 ++now.tv_sec; 2489 ++now.tv_sec;
@@ -2507,6 +2507,10 @@ int aac_command_thread(void *data)
2507 if (kthread_should_stop()) 2507 if (kthread_should_stop())
2508 break; 2508 break;
2509 2509
2510 /*
2511 * we probably want usleep_range() here instead of the
2512 * jiffies computation
2513 */
2510 schedule_timeout(difference); 2514 schedule_timeout(difference);
2511 2515
2512 if (kthread_should_stop()) 2516 if (kthread_should_stop())
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index bdf127aaab41..d55332de08f9 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -1037,7 +1037,7 @@ static int aac_eh_bus_reset(struct scsi_cmnd* cmd)
1037 info = &aac->hba_map[bus][cid]; 1037 info = &aac->hba_map[bus][cid];
1038 if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS || 1038 if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS ||
1039 info->devtype != AAC_DEVTYPE_NATIVE_RAW) { 1039 info->devtype != AAC_DEVTYPE_NATIVE_RAW) {
1040 fib->flags |= FIB_CONTEXT_FLAG_TIMED_OUT; 1040 fib->flags |= FIB_CONTEXT_FLAG_EH_RESET;
1041 cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER; 1041 cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER;
1042 } 1042 }
1043 } 1043 }
diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c
index 72ca2a2e08e2..b2fa195adc7a 100644
--- a/drivers/scsi/bfa/bfad_bsg.c
+++ b/drivers/scsi/bfa/bfad_bsg.c
@@ -3135,7 +3135,8 @@ bfad_im_bsg_vendor_request(struct bsg_job *job)
3135 struct fc_bsg_request *bsg_request = job->request; 3135 struct fc_bsg_request *bsg_request = job->request;
3136 struct fc_bsg_reply *bsg_reply = job->reply; 3136 struct fc_bsg_reply *bsg_reply = job->reply;
3137 uint32_t vendor_cmd = bsg_request->rqst_data.h_vendor.vendor_cmd[0]; 3137 uint32_t vendor_cmd = bsg_request->rqst_data.h_vendor.vendor_cmd[0];
3138 struct bfad_im_port_s *im_port = shost_priv(fc_bsg_to_shost(job)); 3138 struct Scsi_Host *shost = fc_bsg_to_shost(job);
3139 struct bfad_im_port_s *im_port = bfad_get_im_port(shost);
3139 struct bfad_s *bfad = im_port->bfad; 3140 struct bfad_s *bfad = im_port->bfad;
3140 void *payload_kbuf; 3141 void *payload_kbuf;
3141 int rc = -EINVAL; 3142 int rc = -EINVAL;
@@ -3350,7 +3351,8 @@ int
3350bfad_im_bsg_els_ct_request(struct bsg_job *job) 3351bfad_im_bsg_els_ct_request(struct bsg_job *job)
3351{ 3352{
3352 struct bfa_bsg_data *bsg_data; 3353 struct bfa_bsg_data *bsg_data;
3353 struct bfad_im_port_s *im_port = shost_priv(fc_bsg_to_shost(job)); 3354 struct Scsi_Host *shost = fc_bsg_to_shost(job);
3355 struct bfad_im_port_s *im_port = bfad_get_im_port(shost);
3354 struct bfad_s *bfad = im_port->bfad; 3356 struct bfad_s *bfad = im_port->bfad;
3355 bfa_bsg_fcpt_t *bsg_fcpt; 3357 bfa_bsg_fcpt_t *bsg_fcpt;
3356 struct bfad_fcxp *drv_fcxp; 3358 struct bfad_fcxp *drv_fcxp;
diff --git a/drivers/scsi/bfa/bfad_im.c b/drivers/scsi/bfa/bfad_im.c
index 24e657a4ec80..c05d6e91e4bd 100644
--- a/drivers/scsi/bfa/bfad_im.c
+++ b/drivers/scsi/bfa/bfad_im.c
@@ -546,6 +546,7 @@ int
546bfad_im_scsi_host_alloc(struct bfad_s *bfad, struct bfad_im_port_s *im_port, 546bfad_im_scsi_host_alloc(struct bfad_s *bfad, struct bfad_im_port_s *im_port,
547 struct device *dev) 547 struct device *dev)
548{ 548{
549 struct bfad_im_port_pointer *im_portp;
549 int error = 1; 550 int error = 1;
550 551
551 mutex_lock(&bfad_mutex); 552 mutex_lock(&bfad_mutex);
@@ -564,7 +565,8 @@ bfad_im_scsi_host_alloc(struct bfad_s *bfad, struct bfad_im_port_s *im_port,
564 goto out_free_idr; 565 goto out_free_idr;
565 } 566 }
566 567
567 im_port->shost->hostdata[0] = (unsigned long)im_port; 568 im_portp = shost_priv(im_port->shost);
569 im_portp->p = im_port;
568 im_port->shost->unique_id = im_port->idr_id; 570 im_port->shost->unique_id = im_port->idr_id;
569 im_port->shost->this_id = -1; 571 im_port->shost->this_id = -1;
570 im_port->shost->max_id = MAX_FCP_TARGET; 572 im_port->shost->max_id = MAX_FCP_TARGET;
@@ -748,7 +750,7 @@ bfad_scsi_host_alloc(struct bfad_im_port_s *im_port, struct bfad_s *bfad)
748 750
749 sht->sg_tablesize = bfad->cfg_data.io_max_sge; 751 sht->sg_tablesize = bfad->cfg_data.io_max_sge;
750 752
751 return scsi_host_alloc(sht, sizeof(unsigned long)); 753 return scsi_host_alloc(sht, sizeof(struct bfad_im_port_pointer));
752} 754}
753 755
754void 756void
diff --git a/drivers/scsi/bfa/bfad_im.h b/drivers/scsi/bfa/bfad_im.h
index c81ec2a77ef5..06ce4ba2b7bc 100644
--- a/drivers/scsi/bfa/bfad_im.h
+++ b/drivers/scsi/bfa/bfad_im.h
@@ -69,6 +69,16 @@ struct bfad_im_port_s {
69 struct fc_vport *fc_vport; 69 struct fc_vport *fc_vport;
70}; 70};
71 71
72struct bfad_im_port_pointer {
73 struct bfad_im_port_s *p;
74};
75
76static inline struct bfad_im_port_s *bfad_get_im_port(struct Scsi_Host *host)
77{
78 struct bfad_im_port_pointer *im_portp = shost_priv(host);
79 return im_portp->p;
80}
81
72enum bfad_itnim_state { 82enum bfad_itnim_state {
73 ITNIM_STATE_NONE, 83 ITNIM_STATE_NONE,
74 ITNIM_STATE_ONLINE, 84 ITNIM_STATE_ONLINE,
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index 5da46052e179..21be672679fb 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -904,10 +904,14 @@ static void fc_lport_recv_els_req(struct fc_lport *lport,
904 case ELS_FLOGI: 904 case ELS_FLOGI:
905 if (!lport->point_to_multipoint) 905 if (!lport->point_to_multipoint)
906 fc_lport_recv_flogi_req(lport, fp); 906 fc_lport_recv_flogi_req(lport, fp);
907 else
908 fc_rport_recv_req(lport, fp);
907 break; 909 break;
908 case ELS_LOGO: 910 case ELS_LOGO:
909 if (fc_frame_sid(fp) == FC_FID_FLOGI) 911 if (fc_frame_sid(fp) == FC_FID_FLOGI)
910 fc_lport_recv_logo_req(lport, fp); 912 fc_lport_recv_logo_req(lport, fp);
913 else
914 fc_rport_recv_req(lport, fp);
911 break; 915 break;
912 case ELS_RSCN: 916 case ELS_RSCN:
913 lport->tt.disc_recv_req(lport, fp); 917 lport->tt.disc_recv_req(lport, fp);
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index ca1566237ae7..3183d63de4da 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -2145,7 +2145,7 @@ void sas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost,
2145 struct sas_rphy *rphy) 2145 struct sas_rphy *rphy)
2146{ 2146{
2147 struct domain_device *dev; 2147 struct domain_device *dev;
2148 unsigned int reslen = 0; 2148 unsigned int rcvlen = 0;
2149 int ret = -EINVAL; 2149 int ret = -EINVAL;
2150 2150
2151 /* no rphy means no smp target support (ie aic94xx host) */ 2151 /* no rphy means no smp target support (ie aic94xx host) */
@@ -2179,12 +2179,12 @@ void sas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost,
2179 2179
2180 ret = smp_execute_task_sg(dev, job->request_payload.sg_list, 2180 ret = smp_execute_task_sg(dev, job->request_payload.sg_list,
2181 job->reply_payload.sg_list); 2181 job->reply_payload.sg_list);
2182 if (ret > 0) { 2182 if (ret >= 0) {
2183 /* positive number is the untransferred residual */ 2183 /* bsg_job_done() requires the length received */
2184 reslen = ret; 2184 rcvlen = job->reply_payload.payload_len - ret;
2185 ret = 0; 2185 ret = 0;
2186 } 2186 }
2187 2187
2188out: 2188out:
2189 bsg_job_done(job, ret, reslen); 2189 bsg_job_done(job, ret, rcvlen);
2190} 2190}
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 56faeb049b4a..87c08ff37ddd 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -753,12 +753,12 @@ lpfc_rq_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp)
753 drqe.address_hi = putPaddrHigh(rqb_entry->dbuf.phys); 753 drqe.address_hi = putPaddrHigh(rqb_entry->dbuf.phys);
754 rc = lpfc_sli4_rq_put(rqb_entry->hrq, rqb_entry->drq, &hrqe, &drqe); 754 rc = lpfc_sli4_rq_put(rqb_entry->hrq, rqb_entry->drq, &hrqe, &drqe);
755 if (rc < 0) { 755 if (rc < 0) {
756 (rqbp->rqb_free_buffer)(phba, rqb_entry);
757 lpfc_printf_log(phba, KERN_ERR, LOG_INIT, 756 lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
758 "6409 Cannot post to RQ %d: %x %x\n", 757 "6409 Cannot post to RQ %d: %x %x\n",
759 rqb_entry->hrq->queue_id, 758 rqb_entry->hrq->queue_id,
760 rqb_entry->hrq->host_index, 759 rqb_entry->hrq->host_index,
761 rqb_entry->hrq->hba_index); 760 rqb_entry->hrq->hba_index);
761 (rqbp->rqb_free_buffer)(phba, rqb_entry);
762 } else { 762 } else {
763 list_add_tail(&rqb_entry->hbuf.list, &rqbp->rqb_buffer_list); 763 list_add_tail(&rqb_entry->hbuf.list, &rqbp->rqb_buffer_list);
764 rqbp->buffer_count++; 764 rqbp->buffer_count++;
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index a4f28b7e4c65..e18877177f1b 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -1576,7 +1576,9 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
1576 return req; 1576 return req;
1577 1577
1578 for_each_bio(bio) { 1578 for_each_bio(bio) {
1579 ret = blk_rq_append_bio(req, bio); 1579 struct bio *bounce_bio = bio;
1580
1581 ret = blk_rq_append_bio(req, &bounce_bio);
1580 if (ret) 1582 if (ret)
1581 return ERR_PTR(ret); 1583 return ERR_PTR(ret);
1582 } 1584 }
diff --git a/drivers/scsi/scsi_debugfs.c b/drivers/scsi/scsi_debugfs.c
index 01f08c03f2c1..c3765d29fd3f 100644
--- a/drivers/scsi/scsi_debugfs.c
+++ b/drivers/scsi/scsi_debugfs.c
@@ -8,9 +8,11 @@ void scsi_show_rq(struct seq_file *m, struct request *rq)
8{ 8{
9 struct scsi_cmnd *cmd = container_of(scsi_req(rq), typeof(*cmd), req); 9 struct scsi_cmnd *cmd = container_of(scsi_req(rq), typeof(*cmd), req);
10 int msecs = jiffies_to_msecs(jiffies - cmd->jiffies_at_alloc); 10 int msecs = jiffies_to_msecs(jiffies - cmd->jiffies_at_alloc);
11 char buf[80]; 11 const u8 *const cdb = READ_ONCE(cmd->cmnd);
12 char buf[80] = "(?)";
12 13
13 __scsi_format_command(buf, sizeof(buf), cmd->cmnd, cmd->cmd_len); 14 if (cdb)
15 __scsi_format_command(buf, sizeof(buf), cdb, cmd->cmd_len);
14 seq_printf(m, ", .cmd=%s, .retries=%d, allocated %d.%03d s ago", buf, 16 seq_printf(m, ", .cmd=%s, .retries=%d, allocated %d.%03d s ago", buf,
15 cmd->retries, msecs / 1000, msecs % 1000); 17 cmd->retries, msecs / 1000, msecs % 1000);
16} 18}
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index 78d4aa8df675..dfb8da83fa50 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -34,7 +34,6 @@ struct scsi_dev_info_list_table {
34}; 34};
35 35
36 36
37static const char spaces[] = " "; /* 16 of them */
38static blist_flags_t scsi_default_dev_flags; 37static blist_flags_t scsi_default_dev_flags;
39static LIST_HEAD(scsi_dev_info_list); 38static LIST_HEAD(scsi_dev_info_list);
40static char scsi_dev_flags[256]; 39static char scsi_dev_flags[256];
@@ -298,20 +297,13 @@ static void scsi_strcpy_devinfo(char *name, char *to, size_t to_length,
298 size_t from_length; 297 size_t from_length;
299 298
300 from_length = strlen(from); 299 from_length = strlen(from);
301 strncpy(to, from, min(to_length, from_length)); 300 /* This zero-pads the destination */
302 if (from_length < to_length) { 301 strncpy(to, from, to_length);
303 if (compatible) { 302 if (from_length < to_length && !compatible) {
304 /* 303 /*
305 * NUL terminate the string if it is short. 304 * space pad the string if it is short.
306 */ 305 */
307 to[from_length] = '\0'; 306 memset(&to[from_length], ' ', to_length - from_length);
308 } else {
309 /*
310 * space pad the string if it is short.
311 */
312 strncpy(&to[from_length], spaces,
313 to_length - from_length);
314 }
315 } 307 }
316 if (from_length > to_length) 308 if (from_length > to_length)
317 printk(KERN_WARNING "%s: %s string '%s' is too long\n", 309 printk(KERN_WARNING "%s: %s string '%s' is too long\n",
@@ -382,10 +374,8 @@ int scsi_dev_info_list_add_keyed(int compatible, char *vendor, char *model,
382 model, compatible); 374 model, compatible);
383 375
384 if (strflags) 376 if (strflags)
385 devinfo->flags = simple_strtoul(strflags, NULL, 0); 377 flags = (__force blist_flags_t)simple_strtoul(strflags, NULL, 0);
386 else 378 devinfo->flags = flags;
387 devinfo->flags = flags;
388
389 devinfo->compatible = compatible; 379 devinfo->compatible = compatible;
390 380
391 if (compatible) 381 if (compatible)
@@ -458,7 +448,8 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor,
458 /* 448 /*
459 * vendor strings must be an exact match 449 * vendor strings must be an exact match
460 */ 450 */
461 if (vmax != strlen(devinfo->vendor) || 451 if (vmax != strnlen(devinfo->vendor,
452 sizeof(devinfo->vendor)) ||
462 memcmp(devinfo->vendor, vskip, vmax)) 453 memcmp(devinfo->vendor, vskip, vmax))
463 continue; 454 continue;
464 455
@@ -466,7 +457,7 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor,
466 * @model specifies the full string, and 457 * @model specifies the full string, and
467 * must be larger or equal to devinfo->model 458 * must be larger or equal to devinfo->model
468 */ 459 */
469 mlen = strlen(devinfo->model); 460 mlen = strnlen(devinfo->model, sizeof(devinfo->model));
470 if (mmax < mlen || memcmp(devinfo->model, mskip, mlen)) 461 if (mmax < mlen || memcmp(devinfo->model, mskip, mlen))
471 continue; 462 continue;
472 return devinfo; 463 return devinfo;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 00742c50cd44..d9ca1dfab154 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1967,6 +1967,8 @@ static bool scsi_mq_get_budget(struct blk_mq_hw_ctx *hctx)
1967out_put_device: 1967out_put_device:
1968 put_device(&sdev->sdev_gendev); 1968 put_device(&sdev->sdev_gendev);
1969out: 1969out:
1970 if (atomic_read(&sdev->device_busy) == 0 && !scsi_device_blocked(sdev))
1971 blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
1970 return false; 1972 return false;
1971} 1973}
1972 1974
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index be5e919db0e8..0880d975eed3 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -770,7 +770,7 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result,
770 * SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized 770 * SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
771 **/ 771 **/
772static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, 772static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
773 int *bflags, int async) 773 blist_flags_t *bflags, int async)
774{ 774{
775 int ret; 775 int ret;
776 776
@@ -1049,14 +1049,15 @@ static unsigned char *scsi_inq_str(unsigned char *buf, unsigned char *inq,
1049 * - SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized 1049 * - SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
1050 **/ 1050 **/
1051static int scsi_probe_and_add_lun(struct scsi_target *starget, 1051static int scsi_probe_and_add_lun(struct scsi_target *starget,
1052 u64 lun, int *bflagsp, 1052 u64 lun, blist_flags_t *bflagsp,
1053 struct scsi_device **sdevp, 1053 struct scsi_device **sdevp,
1054 enum scsi_scan_mode rescan, 1054 enum scsi_scan_mode rescan,
1055 void *hostdata) 1055 void *hostdata)
1056{ 1056{
1057 struct scsi_device *sdev; 1057 struct scsi_device *sdev;
1058 unsigned char *result; 1058 unsigned char *result;
1059 int bflags, res = SCSI_SCAN_NO_RESPONSE, result_len = 256; 1059 blist_flags_t bflags;
1060 int res = SCSI_SCAN_NO_RESPONSE, result_len = 256;
1060 struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); 1061 struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
1061 1062
1062 /* 1063 /*
@@ -1201,7 +1202,7 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget,
1201 * Modifies sdevscan->lun. 1202 * Modifies sdevscan->lun.
1202 **/ 1203 **/
1203static void scsi_sequential_lun_scan(struct scsi_target *starget, 1204static void scsi_sequential_lun_scan(struct scsi_target *starget,
1204 int bflags, int scsi_level, 1205 blist_flags_t bflags, int scsi_level,
1205 enum scsi_scan_mode rescan) 1206 enum scsi_scan_mode rescan)
1206{ 1207{
1207 uint max_dev_lun; 1208 uint max_dev_lun;
@@ -1292,7 +1293,7 @@ static void scsi_sequential_lun_scan(struct scsi_target *starget,
1292 * 0: scan completed (or no memory, so further scanning is futile) 1293 * 0: scan completed (or no memory, so further scanning is futile)
1293 * 1: could not scan with REPORT LUN 1294 * 1: could not scan with REPORT LUN
1294 **/ 1295 **/
1295static int scsi_report_lun_scan(struct scsi_target *starget, int bflags, 1296static int scsi_report_lun_scan(struct scsi_target *starget, blist_flags_t bflags,
1296 enum scsi_scan_mode rescan) 1297 enum scsi_scan_mode rescan)
1297{ 1298{
1298 unsigned char scsi_cmd[MAX_COMMAND_SIZE]; 1299 unsigned char scsi_cmd[MAX_COMMAND_SIZE];
@@ -1538,7 +1539,7 @@ static void __scsi_scan_target(struct device *parent, unsigned int channel,
1538 unsigned int id, u64 lun, enum scsi_scan_mode rescan) 1539 unsigned int id, u64 lun, enum scsi_scan_mode rescan)
1539{ 1540{
1540 struct Scsi_Host *shost = dev_to_shost(parent); 1541 struct Scsi_Host *shost = dev_to_shost(parent);
1541 int bflags = 0; 1542 blist_flags_t bflags = 0;
1542 int res; 1543 int res;
1543 struct scsi_target *starget; 1544 struct scsi_target *starget;
1544 1545
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 50e7d7e4a861..26ce17178401 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -967,7 +967,8 @@ sdev_show_wwid(struct device *dev, struct device_attribute *attr,
967} 967}
968static DEVICE_ATTR(wwid, S_IRUGO, sdev_show_wwid, NULL); 968static DEVICE_ATTR(wwid, S_IRUGO, sdev_show_wwid, NULL);
969 969
970#define BLIST_FLAG_NAME(name) [ilog2(BLIST_##name)] = #name 970#define BLIST_FLAG_NAME(name) \
971 [ilog2((__force unsigned int)BLIST_##name)] = #name
971static const char *const sdev_bflags_name[] = { 972static const char *const sdev_bflags_name[] = {
972#include "scsi_devinfo_tbl.c" 973#include "scsi_devinfo_tbl.c"
973}; 974};
@@ -984,7 +985,7 @@ sdev_show_blacklist(struct device *dev, struct device_attribute *attr,
984 for (i = 0; i < sizeof(sdev->sdev_bflags) * BITS_PER_BYTE; i++) { 985 for (i = 0; i < sizeof(sdev->sdev_bflags) * BITS_PER_BYTE; i++) {
985 const char *name = NULL; 986 const char *name = NULL;
986 987
987 if (!(sdev->sdev_bflags & BIT(i))) 988 if (!(sdev->sdev_bflags & (__force blist_flags_t)BIT(i)))
988 continue; 989 continue;
989 if (i < ARRAY_SIZE(sdev_bflags_name) && sdev_bflags_name[i]) 990 if (i < ARRAY_SIZE(sdev_bflags_name) && sdev_bflags_name[i])
990 name = sdev_bflags_name[i]; 991 name = sdev_bflags_name[i];
@@ -1414,7 +1415,10 @@ static void __scsi_remove_target(struct scsi_target *starget)
1414 * check. 1415 * check.
1415 */ 1416 */
1416 if (sdev->channel != starget->channel || 1417 if (sdev->channel != starget->channel ||
1417 sdev->id != starget->id || 1418 sdev->id != starget->id)
1419 continue;
1420 if (sdev->sdev_state == SDEV_DEL ||
1421 sdev->sdev_state == SDEV_CANCEL ||
1418 !get_device(&sdev->sdev_gendev)) 1422 !get_device(&sdev->sdev_gendev))
1419 continue; 1423 continue;
1420 spin_unlock_irqrestore(shost->host_lock, flags); 1424 spin_unlock_irqrestore(shost->host_lock, flags);
diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c
index d0219e36080c..10ebb213ddb3 100644
--- a/drivers/scsi/scsi_transport_spi.c
+++ b/drivers/scsi/scsi_transport_spi.c
@@ -50,14 +50,14 @@
50 50
51/* Our blacklist flags */ 51/* Our blacklist flags */
52enum { 52enum {
53 SPI_BLIST_NOIUS = 0x1, 53 SPI_BLIST_NOIUS = (__force blist_flags_t)0x1,
54}; 54};
55 55
56/* blacklist table, modelled on scsi_devinfo.c */ 56/* blacklist table, modelled on scsi_devinfo.c */
57static struct { 57static struct {
58 char *vendor; 58 char *vendor;
59 char *model; 59 char *model;
60 unsigned flags; 60 blist_flags_t flags;
61} spi_static_device_list[] __initdata = { 61} spi_static_device_list[] __initdata = {
62 {"HP", "Ultrium 3-SCSI", SPI_BLIST_NOIUS }, 62 {"HP", "Ultrium 3-SCSI", SPI_BLIST_NOIUS },
63 {"IBM", "ULTRIUM-TD3", SPI_BLIST_NOIUS }, 63 {"IBM", "ULTRIUM-TD3", SPI_BLIST_NOIUS },
@@ -221,9 +221,11 @@ static int spi_device_configure(struct transport_container *tc,
221{ 221{
222 struct scsi_device *sdev = to_scsi_device(dev); 222 struct scsi_device *sdev = to_scsi_device(dev);
223 struct scsi_target *starget = sdev->sdev_target; 223 struct scsi_target *starget = sdev->sdev_target;
224 unsigned bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8], 224 blist_flags_t bflags;
225 &sdev->inquiry[16], 225
226 SCSI_DEVINFO_SPI); 226 bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8],
227 &sdev->inquiry[16],
228 SCSI_DEVINFO_SPI);
227 229
228 /* Populate the target capability fields with the values 230 /* Populate the target capability fields with the values
229 * gleaned from the device inquiry */ 231 * gleaned from the device inquiry */
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 24fe68522716..a028ab3322a9 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1312,6 +1312,7 @@ static int sd_init_command(struct scsi_cmnd *cmd)
1312static void sd_uninit_command(struct scsi_cmnd *SCpnt) 1312static void sd_uninit_command(struct scsi_cmnd *SCpnt)
1313{ 1313{
1314 struct request *rq = SCpnt->request; 1314 struct request *rq = SCpnt->request;
1315 u8 *cmnd;
1315 1316
1316 if (SCpnt->flags & SCMD_ZONE_WRITE_LOCK) 1317 if (SCpnt->flags & SCMD_ZONE_WRITE_LOCK)
1317 sd_zbc_write_unlock_zone(SCpnt); 1318 sd_zbc_write_unlock_zone(SCpnt);
@@ -1320,9 +1321,10 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
1320 __free_page(rq->special_vec.bv_page); 1321 __free_page(rq->special_vec.bv_page);
1321 1322
1322 if (SCpnt->cmnd != scsi_req(rq)->cmd) { 1323 if (SCpnt->cmnd != scsi_req(rq)->cmd) {
1323 mempool_free(SCpnt->cmnd, sd_cdb_pool); 1324 cmnd = SCpnt->cmnd;
1324 SCpnt->cmnd = NULL; 1325 SCpnt->cmnd = NULL;
1325 SCpnt->cmd_len = 0; 1326 SCpnt->cmd_len = 0;
1327 mempool_free(cmnd, sd_cdb_pool);
1326 } 1328 }
1327} 1329}
1328 1330
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 1b06cf0375dc..3b3d1d050cac 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -953,10 +953,11 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
953 case TEST_UNIT_READY: 953 case TEST_UNIT_READY:
954 break; 954 break;
955 default: 955 default:
956 set_host_byte(scmnd, DID_TARGET_FAILURE); 956 set_host_byte(scmnd, DID_ERROR);
957 } 957 }
958 break; 958 break;
959 case SRB_STATUS_INVALID_LUN: 959 case SRB_STATUS_INVALID_LUN:
960 set_host_byte(scmnd, DID_NO_CONNECT);
960 do_work = true; 961 do_work = true;
961 process_err_fn = storvsc_remove_lun; 962 process_err_fn = storvsc_remove_lun;
962 break; 963 break;
diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c
index 77fe55ce790c..d65345312527 100644
--- a/drivers/spi/spi-armada-3700.c
+++ b/drivers/spi/spi-armada-3700.c
@@ -79,6 +79,7 @@
79#define A3700_SPI_BYTE_LEN BIT(5) 79#define A3700_SPI_BYTE_LEN BIT(5)
80#define A3700_SPI_CLK_PRESCALE BIT(0) 80#define A3700_SPI_CLK_PRESCALE BIT(0)
81#define A3700_SPI_CLK_PRESCALE_MASK (0x1f) 81#define A3700_SPI_CLK_PRESCALE_MASK (0x1f)
82#define A3700_SPI_CLK_EVEN_OFFS (0x10)
82 83
83#define A3700_SPI_WFIFO_THRS_BIT 28 84#define A3700_SPI_WFIFO_THRS_BIT 28
84#define A3700_SPI_RFIFO_THRS_BIT 24 85#define A3700_SPI_RFIFO_THRS_BIT 24
@@ -220,6 +221,13 @@ static void a3700_spi_clock_set(struct a3700_spi *a3700_spi,
220 221
221 prescale = DIV_ROUND_UP(clk_get_rate(a3700_spi->clk), speed_hz); 222 prescale = DIV_ROUND_UP(clk_get_rate(a3700_spi->clk), speed_hz);
222 223
224 /* For prescaler values over 15, we can only set it by steps of 2.
225 * Starting from A3700_SPI_CLK_EVEN_OFFS, we set values from 0 up to
226 * 30. We only use this range from 16 to 30.
227 */
228 if (prescale > 15)
229 prescale = A3700_SPI_CLK_EVEN_OFFS + DIV_ROUND_UP(prescale, 2);
230
223 val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG); 231 val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
224 val = val & ~A3700_SPI_CLK_PRESCALE_MASK; 232 val = val & ~A3700_SPI_CLK_PRESCALE_MASK;
225 233
diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index f95da364c283..669470971023 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -1661,12 +1661,12 @@ static int atmel_spi_remove(struct platform_device *pdev)
1661 pm_runtime_get_sync(&pdev->dev); 1661 pm_runtime_get_sync(&pdev->dev);
1662 1662
1663 /* reset the hardware and block queue progress */ 1663 /* reset the hardware and block queue progress */
1664 spin_lock_irq(&as->lock);
1665 if (as->use_dma) { 1664 if (as->use_dma) {
1666 atmel_spi_stop_dma(master); 1665 atmel_spi_stop_dma(master);
1667 atmel_spi_release_dma(master); 1666 atmel_spi_release_dma(master);
1668 } 1667 }
1669 1668
1669 spin_lock_irq(&as->lock);
1670 spi_writel(as, CR, SPI_BIT(SWRST)); 1670 spi_writel(as, CR, SPI_BIT(SWRST));
1671 spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */ 1671 spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */
1672 spi_readl(as, SR); 1672 spi_readl(as, SR);
diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index 2ce875764ca6..0835a8d88fb8 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -377,8 +377,8 @@ static int qspi_set_config_register(struct rspi_data *rspi, int access_size)
377 /* Sets SPCMD */ 377 /* Sets SPCMD */
378 rspi_write16(rspi, rspi->spcmd, RSPI_SPCMD0); 378 rspi_write16(rspi, rspi->spcmd, RSPI_SPCMD0);
379 379
380 /* Enables SPI function in master mode */ 380 /* Sets RSPI mode */
381 rspi_write8(rspi, SPCR_SPE | SPCR_MSTR, RSPI_SPCR); 381 rspi_write8(rspi, SPCR_MSTR, RSPI_SPCR);
382 382
383 return 0; 383 return 0;
384} 384}
diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c
index c5cd635c28f3..41410031f8e9 100644
--- a/drivers/spi/spi-sun4i.c
+++ b/drivers/spi/spi-sun4i.c
@@ -525,7 +525,7 @@ err_free_master:
525 525
526static int sun4i_spi_remove(struct platform_device *pdev) 526static int sun4i_spi_remove(struct platform_device *pdev)
527{ 527{
528 pm_runtime_disable(&pdev->dev); 528 pm_runtime_force_suspend(&pdev->dev);
529 529
530 return 0; 530 return 0;
531} 531}
diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c
index bc7100b93dfc..e0b9fe1d0e37 100644
--- a/drivers/spi/spi-xilinx.c
+++ b/drivers/spi/spi-xilinx.c
@@ -271,6 +271,7 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
271 while (remaining_words) { 271 while (remaining_words) {
272 int n_words, tx_words, rx_words; 272 int n_words, tx_words, rx_words;
273 u32 sr; 273 u32 sr;
274 int stalled;
274 275
275 n_words = min(remaining_words, xspi->buffer_size); 276 n_words = min(remaining_words, xspi->buffer_size);
276 277
@@ -299,7 +300,17 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
299 300
300 /* Read out all the data from the Rx FIFO */ 301 /* Read out all the data from the Rx FIFO */
301 rx_words = n_words; 302 rx_words = n_words;
303 stalled = 10;
302 while (rx_words) { 304 while (rx_words) {
305 if (rx_words == n_words && !(stalled--) &&
306 !(sr & XSPI_SR_TX_EMPTY_MASK) &&
307 (sr & XSPI_SR_RX_EMPTY_MASK)) {
308 dev_err(&spi->dev,
309 "Detected stall. Check C_SPI_MODE and C_SPI_MEMORY\n");
310 xspi_init_hw(xspi);
311 return -EIO;
312 }
313
303 if ((sr & XSPI_SR_TX_EMPTY_MASK) && (rx_words > 1)) { 314 if ((sr & XSPI_SR_TX_EMPTY_MASK) && (rx_words > 1)) {
304 xilinx_spi_rx(xspi); 315 xilinx_spi_rx(xspi);
305 rx_words--; 316 rx_words--;
diff --git a/drivers/staging/android/ion/Kconfig b/drivers/staging/android/ion/Kconfig
index a517b2d29f1b..8f6494158d3d 100644
--- a/drivers/staging/android/ion/Kconfig
+++ b/drivers/staging/android/ion/Kconfig
@@ -37,7 +37,7 @@ config ION_CHUNK_HEAP
37 37
38config ION_CMA_HEAP 38config ION_CMA_HEAP
39 bool "Ion CMA heap support" 39 bool "Ion CMA heap support"
40 depends on ION && CMA 40 depends on ION && DMA_CMA
41 help 41 help
42 Choose this option to enable CMA heaps with Ion. This heap is backed 42 Choose this option to enable CMA heaps with Ion. This heap is backed
43 by the Contiguous Memory Allocator (CMA). If your system has these 43 by the Contiguous Memory Allocator (CMA). If your system has these
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index a7d9b0e98572..f480885e346b 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -346,7 +346,7 @@ static int ion_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
346 mutex_lock(&buffer->lock); 346 mutex_lock(&buffer->lock);
347 list_for_each_entry(a, &buffer->attachments, list) { 347 list_for_each_entry(a, &buffer->attachments, list) {
348 dma_sync_sg_for_cpu(a->dev, a->table->sgl, a->table->nents, 348 dma_sync_sg_for_cpu(a->dev, a->table->sgl, a->table->nents,
349 DMA_BIDIRECTIONAL); 349 direction);
350 } 350 }
351 mutex_unlock(&buffer->lock); 351 mutex_unlock(&buffer->lock);
352 352
@@ -368,7 +368,7 @@ static int ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
368 mutex_lock(&buffer->lock); 368 mutex_lock(&buffer->lock);
369 list_for_each_entry(a, &buffer->attachments, list) { 369 list_for_each_entry(a, &buffer->attachments, list) {
370 dma_sync_sg_for_device(a->dev, a->table->sgl, a->table->nents, 370 dma_sync_sg_for_device(a->dev, a->table->sgl, a->table->nents,
371 DMA_BIDIRECTIONAL); 371 direction);
372 } 372 }
373 mutex_unlock(&buffer->lock); 373 mutex_unlock(&buffer->lock);
374 374
diff --git a/drivers/staging/android/ion/ion_cma_heap.c b/drivers/staging/android/ion/ion_cma_heap.c
index dd5545d9990a..86196ffd2faf 100644
--- a/drivers/staging/android/ion/ion_cma_heap.c
+++ b/drivers/staging/android/ion/ion_cma_heap.c
@@ -39,9 +39,15 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
39 struct ion_cma_heap *cma_heap = to_cma_heap(heap); 39 struct ion_cma_heap *cma_heap = to_cma_heap(heap);
40 struct sg_table *table; 40 struct sg_table *table;
41 struct page *pages; 41 struct page *pages;
42 unsigned long size = PAGE_ALIGN(len);
43 unsigned long nr_pages = size >> PAGE_SHIFT;
44 unsigned long align = get_order(size);
42 int ret; 45 int ret;
43 46
44 pages = cma_alloc(cma_heap->cma, len, 0, GFP_KERNEL); 47 if (align > CONFIG_CMA_ALIGNMENT)
48 align = CONFIG_CMA_ALIGNMENT;
49
50 pages = cma_alloc(cma_heap->cma, nr_pages, align, GFP_KERNEL);
45 if (!pages) 51 if (!pages)
46 return -ENOMEM; 52 return -ENOMEM;
47 53
@@ -53,7 +59,7 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
53 if (ret) 59 if (ret)
54 goto free_mem; 60 goto free_mem;
55 61
56 sg_set_page(table->sgl, pages, len, 0); 62 sg_set_page(table->sgl, pages, size, 0);
57 63
58 buffer->priv_virt = pages; 64 buffer->priv_virt = pages;
59 buffer->sg_table = table; 65 buffer->sg_table = table;
@@ -62,7 +68,7 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
62free_mem: 68free_mem:
63 kfree(table); 69 kfree(table);
64err: 70err:
65 cma_release(cma_heap->cma, pages, buffer->size); 71 cma_release(cma_heap->cma, pages, nr_pages);
66 return -ENOMEM; 72 return -ENOMEM;
67} 73}
68 74
@@ -70,9 +76,10 @@ static void ion_cma_free(struct ion_buffer *buffer)
70{ 76{
71 struct ion_cma_heap *cma_heap = to_cma_heap(buffer->heap); 77 struct ion_cma_heap *cma_heap = to_cma_heap(buffer->heap);
72 struct page *pages = buffer->priv_virt; 78 struct page *pages = buffer->priv_virt;
79 unsigned long nr_pages = PAGE_ALIGN(buffer->size) >> PAGE_SHIFT;
73 80
74 /* release memory */ 81 /* release memory */
75 cma_release(cma_heap->cma, pages, buffer->size); 82 cma_release(cma_heap->cma, pages, nr_pages);
76 /* release sg table */ 83 /* release sg table */
77 sg_free_table(buffer->sg_table); 84 sg_free_table(buffer->sg_table);
78 kfree(buffer->sg_table); 85 kfree(buffer->sg_table);
diff --git a/drivers/staging/ccree/ssi_hash.c b/drivers/staging/ccree/ssi_hash.c
index 1799d3f26a9e..2035835b62dc 100644
--- a/drivers/staging/ccree/ssi_hash.c
+++ b/drivers/staging/ccree/ssi_hash.c
@@ -1769,7 +1769,7 @@ static int ssi_ahash_import(struct ahash_request *req, const void *in)
1769 struct device *dev = drvdata_to_dev(ctx->drvdata); 1769 struct device *dev = drvdata_to_dev(ctx->drvdata);
1770 struct ahash_req_ctx *state = ahash_request_ctx(req); 1770 struct ahash_req_ctx *state = ahash_request_ctx(req);
1771 u32 tmp; 1771 u32 tmp;
1772 int rc; 1772 int rc = 0;
1773 1773
1774 memcpy(&tmp, in, sizeof(u32)); 1774 memcpy(&tmp, in, sizeof(u32));
1775 if (tmp != CC_EXPORT_MAGIC) { 1775 if (tmp != CC_EXPORT_MAGIC) {
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
index 986c2a40d978..8267119ccc8e 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
@@ -487,21 +487,18 @@ ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr,
487 ksocknal_nid2peerlist(id.nid)); 487 ksocknal_nid2peerlist(id.nid));
488 } 488 }
489 489
490 route2 = NULL;
491 list_for_each_entry(route2, &peer->ksnp_routes, ksnr_list) { 490 list_for_each_entry(route2, &peer->ksnp_routes, ksnr_list) {
492 if (route2->ksnr_ipaddr == ipaddr) 491 if (route2->ksnr_ipaddr == ipaddr) {
493 break; 492 /* Route already exists, use the old one */
494 493 ksocknal_route_decref(route);
495 route2 = NULL; 494 route2->ksnr_share_count++;
496 } 495 goto out;
497 if (!route2) { 496 }
498 ksocknal_add_route_locked(peer, route);
499 route->ksnr_share_count++;
500 } else {
501 ksocknal_route_decref(route);
502 route2->ksnr_share_count++;
503 } 497 }
504 498 /* Route doesn't already exist, add the new one */
499 ksocknal_add_route_locked(peer, route);
500 route->ksnr_share_count++;
501out:
505 write_unlock_bh(&ksocknal_data.ksnd_global_lock); 502 write_unlock_bh(&ksocknal_data.ksnd_global_lock);
506 503
507 return 0; 504 return 0;
diff --git a/drivers/staging/pi433/rf69.c b/drivers/staging/pi433/rf69.c
index e69a2153c999..12c9df9cddde 100644
--- a/drivers/staging/pi433/rf69.c
+++ b/drivers/staging/pi433/rf69.c
@@ -102,7 +102,7 @@ enum modulation rf69_get_modulation(struct spi_device *spi)
102 102
103 currentValue = READ_REG(REG_DATAMODUL); 103 currentValue = READ_REG(REG_DATAMODUL);
104 104
105 switch (currentValue & MASK_DATAMODUL_MODULATION_TYPE >> 3) { // TODO improvement: change 3 to define 105 switch (currentValue & MASK_DATAMODUL_MODULATION_TYPE) {
106 case DATAMODUL_MODULATION_TYPE_OOK: return OOK; 106 case DATAMODUL_MODULATION_TYPE_OOK: return OOK;
107 case DATAMODUL_MODULATION_TYPE_FSK: return FSK; 107 case DATAMODUL_MODULATION_TYPE_FSK: return FSK;
108 default: return undefined; 108 default: return undefined;
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 7c69b4a9694d..0d99b242e82e 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -920,7 +920,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
920 " %d i: %d bio: %p, allocating another" 920 " %d i: %d bio: %p, allocating another"
921 " bio\n", bio->bi_vcnt, i, bio); 921 " bio\n", bio->bi_vcnt, i, bio);
922 922
923 rc = blk_rq_append_bio(req, bio); 923 rc = blk_rq_append_bio(req, &bio);
924 if (rc) { 924 if (rc) {
925 pr_err("pSCSI: failed to append bio\n"); 925 pr_err("pSCSI: failed to append bio\n");
926 goto fail; 926 goto fail;
@@ -938,7 +938,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
938 } 938 }
939 939
940 if (bio) { 940 if (bio) {
941 rc = blk_rq_append_bio(req, bio); 941 rc = blk_rq_append_bio(req, &bio);
942 if (rc) { 942 if (rc) {
943 pr_err("pSCSI: failed to append bio\n"); 943 pr_err("pSCSI: failed to append bio\n");
944 goto fail; 944 goto fail;
diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c
index 419a7a90bce0..f45bcbc63738 100644
--- a/drivers/thunderbolt/nhi.c
+++ b/drivers/thunderbolt/nhi.c
@@ -339,7 +339,7 @@ static void __ring_interrupt(struct tb_ring *ring)
339 return; 339 return;
340 340
341 if (ring->start_poll) { 341 if (ring->start_poll) {
342 __ring_interrupt_mask(ring, false); 342 __ring_interrupt_mask(ring, true);
343 ring->start_poll(ring->poll_data); 343 ring->start_poll(ring->poll_data);
344 } else { 344 } else {
345 schedule_work(&ring->work); 345 schedule_work(&ring->work);
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 427e0d5d8f13..539b49adb6af 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -1762,7 +1762,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
1762{ 1762{
1763 struct n_tty_data *ldata = tty->disc_data; 1763 struct n_tty_data *ldata = tty->disc_data;
1764 1764
1765 if (!old || (old->c_lflag ^ tty->termios.c_lflag) & ICANON) { 1765 if (!old || (old->c_lflag ^ tty->termios.c_lflag) & (ICANON | EXTPROC)) {
1766 bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE); 1766 bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE);
1767 ldata->line_start = ldata->read_tail; 1767 ldata->line_start = ldata->read_tail;
1768 if (!L_ICANON(tty) || !read_cnt(ldata)) { 1768 if (!L_ICANON(tty) || !read_cnt(ldata)) {
@@ -2425,7 +2425,7 @@ static int n_tty_ioctl(struct tty_struct *tty, struct file *file,
2425 return put_user(tty_chars_in_buffer(tty), (int __user *) arg); 2425 return put_user(tty_chars_in_buffer(tty), (int __user *) arg);
2426 case TIOCINQ: 2426 case TIOCINQ:
2427 down_write(&tty->termios_rwsem); 2427 down_write(&tty->termios_rwsem);
2428 if (L_ICANON(tty)) 2428 if (L_ICANON(tty) && !L_EXTPROC(tty))
2429 retval = inq_canon(ldata); 2429 retval = inq_canon(ldata);
2430 else 2430 else
2431 retval = read_cnt(ldata); 2431 retval = read_cnt(ldata);
diff --git a/drivers/usb/chipidea/ci_hdrc_msm.c b/drivers/usb/chipidea/ci_hdrc_msm.c
index 3593ce0ec641..880009987460 100644
--- a/drivers/usb/chipidea/ci_hdrc_msm.c
+++ b/drivers/usb/chipidea/ci_hdrc_msm.c
@@ -247,7 +247,7 @@ static int ci_hdrc_msm_probe(struct platform_device *pdev)
247 if (ret) 247 if (ret)
248 goto err_mux; 248 goto err_mux;
249 249
250 ulpi_node = of_find_node_by_name(of_node_get(pdev->dev.of_node), "ulpi"); 250 ulpi_node = of_get_child_by_name(pdev->dev.of_node, "ulpi");
251 if (ulpi_node) { 251 if (ulpi_node) {
252 phy_node = of_get_next_available_child(ulpi_node, NULL); 252 phy_node = of_get_next_available_child(ulpi_node, NULL);
253 ci->hsic = of_device_is_compatible(phy_node, "qcom,usb-hsic-phy"); 253 ci->hsic = of_device_is_compatible(phy_node, "qcom,usb-hsic-phy");
diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 55b198ba629b..c821b4b9647e 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -555,6 +555,9 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
555 unsigned iad_num = 0; 555 unsigned iad_num = 0;
556 556
557 memcpy(&config->desc, buffer, USB_DT_CONFIG_SIZE); 557 memcpy(&config->desc, buffer, USB_DT_CONFIG_SIZE);
558 nintf = nintf_orig = config->desc.bNumInterfaces;
559 config->desc.bNumInterfaces = 0; // Adjusted later
560
558 if (config->desc.bDescriptorType != USB_DT_CONFIG || 561 if (config->desc.bDescriptorType != USB_DT_CONFIG ||
559 config->desc.bLength < USB_DT_CONFIG_SIZE || 562 config->desc.bLength < USB_DT_CONFIG_SIZE ||
560 config->desc.bLength > size) { 563 config->desc.bLength > size) {
@@ -568,7 +571,6 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
568 buffer += config->desc.bLength; 571 buffer += config->desc.bLength;
569 size -= config->desc.bLength; 572 size -= config->desc.bLength;
570 573
571 nintf = nintf_orig = config->desc.bNumInterfaces;
572 if (nintf > USB_MAXINTERFACES) { 574 if (nintf > USB_MAXINTERFACES) {
573 dev_warn(ddev, "config %d has too many interfaces: %d, " 575 dev_warn(ddev, "config %d has too many interfaces: %d, "
574 "using maximum allowed: %d\n", 576 "using maximum allowed: %d\n",
@@ -1005,7 +1007,7 @@ int usb_get_bos_descriptor(struct usb_device *dev)
1005 case USB_SSP_CAP_TYPE: 1007 case USB_SSP_CAP_TYPE:
1006 ssp_cap = (struct usb_ssp_cap_descriptor *)buffer; 1008 ssp_cap = (struct usb_ssp_cap_descriptor *)buffer;
1007 ssac = (le32_to_cpu(ssp_cap->bmAttributes) & 1009 ssac = (le32_to_cpu(ssp_cap->bmAttributes) &
1008 USB_SSP_SUBLINK_SPEED_ATTRIBS) + 1; 1010 USB_SSP_SUBLINK_SPEED_ATTRIBS);
1009 if (length >= USB_DT_USB_SSP_CAP_SIZE(ssac)) 1011 if (length >= USB_DT_USB_SSP_CAP_SIZE(ssac))
1010 dev->bos->ssp_cap = ssp_cap; 1012 dev->bos->ssp_cap = ssp_cap;
1011 break; 1013 break;
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index a10b346b9777..4024926c1d68 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -52,10 +52,11 @@ static const struct usb_device_id usb_quirk_list[] = {
52 /* Microsoft LifeCam-VX700 v2.0 */ 52 /* Microsoft LifeCam-VX700 v2.0 */
53 { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME }, 53 { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME },
54 54
55 /* Logitech HD Pro Webcams C920, C920-C and C930e */ 55 /* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */
56 { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, 56 { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT },
57 { USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT }, 57 { USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT },
58 { USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT }, 58 { USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT },
59 { USB_DEVICE(0x046d, 0x085b), .driver_info = USB_QUIRK_DELAY_INIT },
59 60
60 /* Logitech ConferenceCam CC3000e */ 61 /* Logitech ConferenceCam CC3000e */
61 { USB_DEVICE(0x046d, 0x0847), .driver_info = USB_QUIRK_DELAY_INIT }, 62 { USB_DEVICE(0x046d, 0x0847), .driver_info = USB_QUIRK_DELAY_INIT },
@@ -149,6 +150,9 @@ static const struct usb_device_id usb_quirk_list[] = {
149 /* Genesys Logic hub, internally used by KY-688 USB 3.1 Type-C Hub */ 150 /* Genesys Logic hub, internally used by KY-688 USB 3.1 Type-C Hub */
150 { USB_DEVICE(0x05e3, 0x0612), .driver_info = USB_QUIRK_NO_LPM }, 151 { USB_DEVICE(0x05e3, 0x0612), .driver_info = USB_QUIRK_NO_LPM },
151 152
153 /* ELSA MicroLink 56K */
154 { USB_DEVICE(0x05cc, 0x2267), .driver_info = USB_QUIRK_RESET_RESUME },
155
152 /* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */ 156 /* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */
153 { USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM }, 157 { USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM },
154 158
diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
index f66c94130cac..31749c79045f 100644
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -537,6 +537,7 @@ struct dwc2_core_params {
537 * 2 - Internal DMA 537 * 2 - Internal DMA
538 * @power_optimized Are power optimizations enabled? 538 * @power_optimized Are power optimizations enabled?
539 * @num_dev_ep Number of device endpoints available 539 * @num_dev_ep Number of device endpoints available
540 * @num_dev_in_eps Number of device IN endpoints available
540 * @num_dev_perio_in_ep Number of device periodic IN endpoints 541 * @num_dev_perio_in_ep Number of device periodic IN endpoints
541 * available 542 * available
542 * @dev_token_q_depth Device Mode IN Token Sequence Learning Queue 543 * @dev_token_q_depth Device Mode IN Token Sequence Learning Queue
@@ -565,6 +566,7 @@ struct dwc2_core_params {
565 * 2 - 8 or 16 bits 566 * 2 - 8 or 16 bits
566 * @snpsid: Value from SNPSID register 567 * @snpsid: Value from SNPSID register
567 * @dev_ep_dirs: Direction of device endpoints (GHWCFG1) 568 * @dev_ep_dirs: Direction of device endpoints (GHWCFG1)
569 * @g_tx_fifo_size[] Power-on values of TxFIFO sizes
568 */ 570 */
569struct dwc2_hw_params { 571struct dwc2_hw_params {
570 unsigned op_mode:3; 572 unsigned op_mode:3;
@@ -586,12 +588,14 @@ struct dwc2_hw_params {
586 unsigned fs_phy_type:2; 588 unsigned fs_phy_type:2;
587 unsigned i2c_enable:1; 589 unsigned i2c_enable:1;
588 unsigned num_dev_ep:4; 590 unsigned num_dev_ep:4;
591 unsigned num_dev_in_eps : 4;
589 unsigned num_dev_perio_in_ep:4; 592 unsigned num_dev_perio_in_ep:4;
590 unsigned total_fifo_size:16; 593 unsigned total_fifo_size:16;
591 unsigned power_optimized:1; 594 unsigned power_optimized:1;
592 unsigned utmi_phy_data_width:2; 595 unsigned utmi_phy_data_width:2;
593 u32 snpsid; 596 u32 snpsid;
594 u32 dev_ep_dirs; 597 u32 dev_ep_dirs;
598 u32 g_tx_fifo_size[MAX_EPS_CHANNELS];
595}; 599};
596 600
597/* Size of control and EP0 buffers */ 601/* Size of control and EP0 buffers */
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index 88529d092503..e4c3ce0de5de 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -195,55 +195,18 @@ int dwc2_hsotg_tx_fifo_count(struct dwc2_hsotg *hsotg)
195{ 195{
196 if (hsotg->hw_params.en_multiple_tx_fifo) 196 if (hsotg->hw_params.en_multiple_tx_fifo)
197 /* In dedicated FIFO mode we need count of IN EPs */ 197 /* In dedicated FIFO mode we need count of IN EPs */
198 return (dwc2_readl(hsotg->regs + GHWCFG4) & 198 return hsotg->hw_params.num_dev_in_eps;
199 GHWCFG4_NUM_IN_EPS_MASK) >> GHWCFG4_NUM_IN_EPS_SHIFT;
200 else 199 else
201 /* In shared FIFO mode we need count of Periodic IN EPs */ 200 /* In shared FIFO mode we need count of Periodic IN EPs */
202 return hsotg->hw_params.num_dev_perio_in_ep; 201 return hsotg->hw_params.num_dev_perio_in_ep;
203} 202}
204 203
205/** 204/**
206 * dwc2_hsotg_ep_info_size - return Endpoint Info Control block size in DWORDs
207 */
208static int dwc2_hsotg_ep_info_size(struct dwc2_hsotg *hsotg)
209{
210 int val = 0;
211 int i;
212 u32 ep_dirs;
213
214 /*
215 * Don't need additional space for ep info control registers in
216 * slave mode.
217 */
218 if (!using_dma(hsotg)) {
219 dev_dbg(hsotg->dev, "Buffer DMA ep info size 0\n");
220 return 0;
221 }
222
223 /*
224 * Buffer DMA mode - 1 location per endpoit
225 * Descriptor DMA mode - 4 locations per endpoint
226 */
227 ep_dirs = hsotg->hw_params.dev_ep_dirs;
228
229 for (i = 0; i <= hsotg->hw_params.num_dev_ep; i++) {
230 val += ep_dirs & 3 ? 1 : 2;
231 ep_dirs >>= 2;
232 }
233
234 if (using_desc_dma(hsotg))
235 val = val * 4;
236
237 return val;
238}
239
240/**
241 * dwc2_hsotg_tx_fifo_total_depth - return total FIFO depth available for 205 * dwc2_hsotg_tx_fifo_total_depth - return total FIFO depth available for
242 * device mode TX FIFOs 206 * device mode TX FIFOs
243 */ 207 */
244int dwc2_hsotg_tx_fifo_total_depth(struct dwc2_hsotg *hsotg) 208int dwc2_hsotg_tx_fifo_total_depth(struct dwc2_hsotg *hsotg)
245{ 209{
246 int ep_info_size;
247 int addr; 210 int addr;
248 int tx_addr_max; 211 int tx_addr_max;
249 u32 np_tx_fifo_size; 212 u32 np_tx_fifo_size;
@@ -252,8 +215,7 @@ int dwc2_hsotg_tx_fifo_total_depth(struct dwc2_hsotg *hsotg)
252 hsotg->params.g_np_tx_fifo_size); 215 hsotg->params.g_np_tx_fifo_size);
253 216
254 /* Get Endpoint Info Control block size in DWORDs. */ 217 /* Get Endpoint Info Control block size in DWORDs. */
255 ep_info_size = dwc2_hsotg_ep_info_size(hsotg); 218 tx_addr_max = hsotg->hw_params.total_fifo_size;
256 tx_addr_max = hsotg->hw_params.total_fifo_size - ep_info_size;
257 219
258 addr = hsotg->params.g_rx_fifo_size + np_tx_fifo_size; 220 addr = hsotg->params.g_rx_fifo_size + np_tx_fifo_size;
259 if (tx_addr_max <= addr) 221 if (tx_addr_max <= addr)
diff --git a/drivers/usb/dwc2/params.c b/drivers/usb/dwc2/params.c
index ef73af6e03a9..03fd20f0b496 100644
--- a/drivers/usb/dwc2/params.c
+++ b/drivers/usb/dwc2/params.c
@@ -484,8 +484,7 @@ static void dwc2_check_param_tx_fifo_sizes(struct dwc2_hsotg *hsotg)
484 } 484 }
485 485
486 for (fifo = 1; fifo <= fifo_count; fifo++) { 486 for (fifo = 1; fifo <= fifo_count; fifo++) {
487 dptxfszn = (dwc2_readl(hsotg->regs + DPTXFSIZN(fifo)) & 487 dptxfszn = hsotg->hw_params.g_tx_fifo_size[fifo];
488 FIFOSIZE_DEPTH_MASK) >> FIFOSIZE_DEPTH_SHIFT;
489 488
490 if (hsotg->params.g_tx_fifo_size[fifo] < min || 489 if (hsotg->params.g_tx_fifo_size[fifo] < min ||
491 hsotg->params.g_tx_fifo_size[fifo] > dptxfszn) { 490 hsotg->params.g_tx_fifo_size[fifo] > dptxfszn) {
@@ -609,6 +608,7 @@ static void dwc2_get_dev_hwparams(struct dwc2_hsotg *hsotg)
609 struct dwc2_hw_params *hw = &hsotg->hw_params; 608 struct dwc2_hw_params *hw = &hsotg->hw_params;
610 bool forced; 609 bool forced;
611 u32 gnptxfsiz; 610 u32 gnptxfsiz;
611 int fifo, fifo_count;
612 612
613 if (hsotg->dr_mode == USB_DR_MODE_HOST) 613 if (hsotg->dr_mode == USB_DR_MODE_HOST)
614 return; 614 return;
@@ -617,6 +617,14 @@ static void dwc2_get_dev_hwparams(struct dwc2_hsotg *hsotg)
617 617
618 gnptxfsiz = dwc2_readl(hsotg->regs + GNPTXFSIZ); 618 gnptxfsiz = dwc2_readl(hsotg->regs + GNPTXFSIZ);
619 619
620 fifo_count = dwc2_hsotg_tx_fifo_count(hsotg);
621
622 for (fifo = 1; fifo <= fifo_count; fifo++) {
623 hw->g_tx_fifo_size[fifo] =
624 (dwc2_readl(hsotg->regs + DPTXFSIZN(fifo)) &
625 FIFOSIZE_DEPTH_MASK) >> FIFOSIZE_DEPTH_SHIFT;
626 }
627
620 if (forced) 628 if (forced)
621 dwc2_clear_force_mode(hsotg); 629 dwc2_clear_force_mode(hsotg);
622 630
@@ -661,14 +669,6 @@ int dwc2_get_hwparams(struct dwc2_hsotg *hsotg)
661 hwcfg4 = dwc2_readl(hsotg->regs + GHWCFG4); 669 hwcfg4 = dwc2_readl(hsotg->regs + GHWCFG4);
662 grxfsiz = dwc2_readl(hsotg->regs + GRXFSIZ); 670 grxfsiz = dwc2_readl(hsotg->regs + GRXFSIZ);
663 671
664 /*
665 * Host specific hardware parameters. Reading these parameters
666 * requires the controller to be in host mode. The mode will
667 * be forced, if necessary, to read these values.
668 */
669 dwc2_get_host_hwparams(hsotg);
670 dwc2_get_dev_hwparams(hsotg);
671
672 /* hwcfg1 */ 672 /* hwcfg1 */
673 hw->dev_ep_dirs = hwcfg1; 673 hw->dev_ep_dirs = hwcfg1;
674 674
@@ -711,6 +711,8 @@ int dwc2_get_hwparams(struct dwc2_hsotg *hsotg)
711 hw->en_multiple_tx_fifo = !!(hwcfg4 & GHWCFG4_DED_FIFO_EN); 711 hw->en_multiple_tx_fifo = !!(hwcfg4 & GHWCFG4_DED_FIFO_EN);
712 hw->num_dev_perio_in_ep = (hwcfg4 & GHWCFG4_NUM_DEV_PERIO_IN_EP_MASK) >> 712 hw->num_dev_perio_in_ep = (hwcfg4 & GHWCFG4_NUM_DEV_PERIO_IN_EP_MASK) >>
713 GHWCFG4_NUM_DEV_PERIO_IN_EP_SHIFT; 713 GHWCFG4_NUM_DEV_PERIO_IN_EP_SHIFT;
714 hw->num_dev_in_eps = (hwcfg4 & GHWCFG4_NUM_IN_EPS_MASK) >>
715 GHWCFG4_NUM_IN_EPS_SHIFT;
714 hw->dma_desc_enable = !!(hwcfg4 & GHWCFG4_DESC_DMA); 716 hw->dma_desc_enable = !!(hwcfg4 & GHWCFG4_DESC_DMA);
715 hw->power_optimized = !!(hwcfg4 & GHWCFG4_POWER_OPTIMIZ); 717 hw->power_optimized = !!(hwcfg4 & GHWCFG4_POWER_OPTIMIZ);
716 hw->utmi_phy_data_width = (hwcfg4 & GHWCFG4_UTMI_PHY_DATA_WIDTH_MASK) >> 718 hw->utmi_phy_data_width = (hwcfg4 & GHWCFG4_UTMI_PHY_DATA_WIDTH_MASK) >>
@@ -719,6 +721,13 @@ int dwc2_get_hwparams(struct dwc2_hsotg *hsotg)
719 /* fifo sizes */ 721 /* fifo sizes */
720 hw->rx_fifo_size = (grxfsiz & GRXFSIZ_DEPTH_MASK) >> 722 hw->rx_fifo_size = (grxfsiz & GRXFSIZ_DEPTH_MASK) >>
721 GRXFSIZ_DEPTH_SHIFT; 723 GRXFSIZ_DEPTH_SHIFT;
724 /*
725 * Host specific hardware parameters. Reading these parameters
726 * requires the controller to be in host mode. The mode will
727 * be forced, if necessary, to read these values.
728 */
729 dwc2_get_host_hwparams(hsotg);
730 dwc2_get_dev_hwparams(hsotg);
722 731
723 return 0; 732 return 0;
724} 733}
diff --git a/drivers/usb/dwc3/dwc3-of-simple.c b/drivers/usb/dwc3/dwc3-of-simple.c
index c4a4d7bd2766..7ae0eefc7cc7 100644
--- a/drivers/usb/dwc3/dwc3-of-simple.c
+++ b/drivers/usb/dwc3/dwc3-of-simple.c
@@ -51,8 +51,10 @@ static int dwc3_of_simple_clk_init(struct dwc3_of_simple *simple, int count)
51 51
52 clk = of_clk_get(np, i); 52 clk = of_clk_get(np, i);
53 if (IS_ERR(clk)) { 53 if (IS_ERR(clk)) {
54 while (--i >= 0) 54 while (--i >= 0) {
55 clk_disable_unprepare(simple->clks[i]);
55 clk_put(simple->clks[i]); 56 clk_put(simple->clks[i]);
57 }
56 return PTR_ERR(clk); 58 return PTR_ERR(clk);
57 } 59 }
58 60
@@ -203,6 +205,7 @@ static struct platform_driver dwc3_of_simple_driver = {
203 .driver = { 205 .driver = {
204 .name = "dwc3-of-simple", 206 .name = "dwc3-of-simple",
205 .of_match_table = of_dwc3_simple_match, 207 .of_match_table = of_dwc3_simple_match,
208 .pm = &dwc3_of_simple_dev_pm_ops,
206 }, 209 },
207}; 210};
208 211
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 981fd986cf82..639dd1b163a0 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -259,7 +259,7 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned cmd,
259{ 259{
260 const struct usb_endpoint_descriptor *desc = dep->endpoint.desc; 260 const struct usb_endpoint_descriptor *desc = dep->endpoint.desc;
261 struct dwc3 *dwc = dep->dwc; 261 struct dwc3 *dwc = dep->dwc;
262 u32 timeout = 500; 262 u32 timeout = 1000;
263 u32 reg; 263 u32 reg;
264 264
265 int cmd_status = 0; 265 int cmd_status = 0;
@@ -912,7 +912,7 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
912 */ 912 */
913 if (speed == USB_SPEED_HIGH) { 913 if (speed == USB_SPEED_HIGH) {
914 struct usb_ep *ep = &dep->endpoint; 914 struct usb_ep *ep = &dep->endpoint;
915 unsigned int mult = ep->mult - 1; 915 unsigned int mult = 2;
916 unsigned int maxp = usb_endpoint_maxp(ep->desc); 916 unsigned int maxp = usb_endpoint_maxp(ep->desc);
917 917
918 if (length <= (2 * maxp)) 918 if (length <= (2 * maxp))
diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 0a19a76645ad..31cce7805eb2 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -508,8 +508,8 @@ choice
508 controller, and the relevant drivers for each function declared 508 controller, and the relevant drivers for each function declared
509 by the device. 509 by the device.
510 510
511endchoice
512
513source "drivers/usb/gadget/legacy/Kconfig" 511source "drivers/usb/gadget/legacy/Kconfig"
514 512
513endchoice
514
515endif # USB_GADGET 515endif # USB_GADGET
diff --git a/drivers/usb/gadget/legacy/Kconfig b/drivers/usb/gadget/legacy/Kconfig
index 9570bbeced4f..784bf86dad4f 100644
--- a/drivers/usb/gadget/legacy/Kconfig
+++ b/drivers/usb/gadget/legacy/Kconfig
@@ -13,14 +13,6 @@
13# both kinds of controller can also support "USB On-the-Go" (CONFIG_USB_OTG). 13# both kinds of controller can also support "USB On-the-Go" (CONFIG_USB_OTG).
14# 14#
15 15
16menuconfig USB_GADGET_LEGACY
17 bool "Legacy USB Gadget Support"
18 help
19 Legacy USB gadgets are USB gadgets that do not use the USB gadget
20 configfs interface.
21
22if USB_GADGET_LEGACY
23
24config USB_ZERO 16config USB_ZERO
25 tristate "Gadget Zero (DEVELOPMENT)" 17 tristate "Gadget Zero (DEVELOPMENT)"
26 select USB_LIBCOMPOSITE 18 select USB_LIBCOMPOSITE
@@ -487,7 +479,7 @@ endif
487# or video class gadget drivers), or specific hardware, here. 479# or video class gadget drivers), or specific hardware, here.
488config USB_G_WEBCAM 480config USB_G_WEBCAM
489 tristate "USB Webcam Gadget" 481 tristate "USB Webcam Gadget"
490 depends on VIDEO_DEV 482 depends on VIDEO_V4L2
491 select USB_LIBCOMPOSITE 483 select USB_LIBCOMPOSITE
492 select VIDEOBUF2_VMALLOC 484 select VIDEOBUF2_VMALLOC
493 select USB_F_UVC 485 select USB_F_UVC
@@ -498,5 +490,3 @@ config USB_G_WEBCAM
498 490
499 Say "y" to link the driver statically, or "m" to build a 491 Say "y" to link the driver statically, or "m" to build a
500 dynamically linked module called "g_webcam". 492 dynamically linked module called "g_webcam".
501
502endif
diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c
index 4f7895dbcf88..e26e685d8a57 100644
--- a/drivers/usb/host/xhci-debugfs.c
+++ b/drivers/usb/host/xhci-debugfs.c
@@ -162,7 +162,7 @@ static void xhci_debugfs_extcap_regset(struct xhci_hcd *xhci, int cap_id,
162static int xhci_ring_enqueue_show(struct seq_file *s, void *unused) 162static int xhci_ring_enqueue_show(struct seq_file *s, void *unused)
163{ 163{
164 dma_addr_t dma; 164 dma_addr_t dma;
165 struct xhci_ring *ring = s->private; 165 struct xhci_ring *ring = *(struct xhci_ring **)s->private;
166 166
167 dma = xhci_trb_virt_to_dma(ring->enq_seg, ring->enqueue); 167 dma = xhci_trb_virt_to_dma(ring->enq_seg, ring->enqueue);
168 seq_printf(s, "%pad\n", &dma); 168 seq_printf(s, "%pad\n", &dma);
@@ -173,7 +173,7 @@ static int xhci_ring_enqueue_show(struct seq_file *s, void *unused)
173static int xhci_ring_dequeue_show(struct seq_file *s, void *unused) 173static int xhci_ring_dequeue_show(struct seq_file *s, void *unused)
174{ 174{
175 dma_addr_t dma; 175 dma_addr_t dma;
176 struct xhci_ring *ring = s->private; 176 struct xhci_ring *ring = *(struct xhci_ring **)s->private;
177 177
178 dma = xhci_trb_virt_to_dma(ring->deq_seg, ring->dequeue); 178 dma = xhci_trb_virt_to_dma(ring->deq_seg, ring->dequeue);
179 seq_printf(s, "%pad\n", &dma); 179 seq_printf(s, "%pad\n", &dma);
@@ -183,7 +183,7 @@ static int xhci_ring_dequeue_show(struct seq_file *s, void *unused)
183 183
184static int xhci_ring_cycle_show(struct seq_file *s, void *unused) 184static int xhci_ring_cycle_show(struct seq_file *s, void *unused)
185{ 185{
186 struct xhci_ring *ring = s->private; 186 struct xhci_ring *ring = *(struct xhci_ring **)s->private;
187 187
188 seq_printf(s, "%d\n", ring->cycle_state); 188 seq_printf(s, "%d\n", ring->cycle_state);
189 189
@@ -346,7 +346,7 @@ static void xhci_debugfs_create_files(struct xhci_hcd *xhci,
346} 346}
347 347
348static struct dentry *xhci_debugfs_create_ring_dir(struct xhci_hcd *xhci, 348static struct dentry *xhci_debugfs_create_ring_dir(struct xhci_hcd *xhci,
349 struct xhci_ring *ring, 349 struct xhci_ring **ring,
350 const char *name, 350 const char *name,
351 struct dentry *parent) 351 struct dentry *parent)
352{ 352{
@@ -387,7 +387,7 @@ void xhci_debugfs_create_endpoint(struct xhci_hcd *xhci,
387 387
388 snprintf(epriv->name, sizeof(epriv->name), "ep%02d", ep_index); 388 snprintf(epriv->name, sizeof(epriv->name), "ep%02d", ep_index);
389 epriv->root = xhci_debugfs_create_ring_dir(xhci, 389 epriv->root = xhci_debugfs_create_ring_dir(xhci,
390 dev->eps[ep_index].new_ring, 390 &dev->eps[ep_index].new_ring,
391 epriv->name, 391 epriv->name,
392 spriv->root); 392 spriv->root);
393 spriv->eps[ep_index] = epriv; 393 spriv->eps[ep_index] = epriv;
@@ -423,7 +423,7 @@ void xhci_debugfs_create_slot(struct xhci_hcd *xhci, int slot_id)
423 priv->dev = dev; 423 priv->dev = dev;
424 dev->debugfs_private = priv; 424 dev->debugfs_private = priv;
425 425
426 xhci_debugfs_create_ring_dir(xhci, dev->eps[0].ring, 426 xhci_debugfs_create_ring_dir(xhci, &dev->eps[0].ring,
427 "ep00", priv->root); 427 "ep00", priv->root);
428 428
429 xhci_debugfs_create_context_files(xhci, priv->root, slot_id); 429 xhci_debugfs_create_context_files(xhci, priv->root, slot_id);
@@ -488,11 +488,11 @@ void xhci_debugfs_init(struct xhci_hcd *xhci)
488 ARRAY_SIZE(xhci_extcap_dbc), 488 ARRAY_SIZE(xhci_extcap_dbc),
489 "reg-ext-dbc"); 489 "reg-ext-dbc");
490 490
491 xhci_debugfs_create_ring_dir(xhci, xhci->cmd_ring, 491 xhci_debugfs_create_ring_dir(xhci, &xhci->cmd_ring,
492 "command-ring", 492 "command-ring",
493 xhci->debugfs_root); 493 xhci->debugfs_root);
494 494
495 xhci_debugfs_create_ring_dir(xhci, xhci->event_ring, 495 xhci_debugfs_create_ring_dir(xhci, &xhci->event_ring,
496 "event-ring", 496 "event-ring",
497 xhci->debugfs_root); 497 xhci->debugfs_root);
498 498
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 15f7d422885f..3a29b32a3bd0 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -971,10 +971,9 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id,
971 return 0; 971 return 0;
972 } 972 }
973 973
974 xhci->devs[slot_id] = kzalloc(sizeof(*xhci->devs[slot_id]), flags); 974 dev = kzalloc(sizeof(*dev), flags);
975 if (!xhci->devs[slot_id]) 975 if (!dev)
976 return 0; 976 return 0;
977 dev = xhci->devs[slot_id];
978 977
979 /* Allocate the (output) device context that will be used in the HC. */ 978 /* Allocate the (output) device context that will be used in the HC. */
980 dev->out_ctx = xhci_alloc_container_ctx(xhci, XHCI_CTX_TYPE_DEVICE, flags); 979 dev->out_ctx = xhci_alloc_container_ctx(xhci, XHCI_CTX_TYPE_DEVICE, flags);
@@ -1015,9 +1014,17 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id,
1015 1014
1016 trace_xhci_alloc_virt_device(dev); 1015 trace_xhci_alloc_virt_device(dev);
1017 1016
1017 xhci->devs[slot_id] = dev;
1018
1018 return 1; 1019 return 1;
1019fail: 1020fail:
1020 xhci_free_virt_device(xhci, slot_id); 1021
1022 if (dev->in_ctx)
1023 xhci_free_container_ctx(xhci, dev->in_ctx);
1024 if (dev->out_ctx)
1025 xhci_free_container_ctx(xhci, dev->out_ctx);
1026 kfree(dev);
1027
1021 return 0; 1028 return 0;
1022} 1029}
1023 1030
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 7ef1274ef7f7..1aad89b8aba0 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -178,6 +178,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
178 xhci->quirks |= XHCI_BROKEN_STREAMS; 178 xhci->quirks |= XHCI_BROKEN_STREAMS;
179 } 179 }
180 if (pdev->vendor == PCI_VENDOR_ID_RENESAS && 180 if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
181 pdev->device == 0x0014)
182 xhci->quirks |= XHCI_TRUST_TX_LENGTH;
183 if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
181 pdev->device == 0x0015) 184 pdev->device == 0x0015)
182 xhci->quirks |= XHCI_RESET_ON_RESUME; 185 xhci->quirks |= XHCI_RESET_ON_RESUME;
183 if (pdev->vendor == PCI_VENDOR_ID_VIA) 186 if (pdev->vendor == PCI_VENDOR_ID_VIA)
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 6eb87c6e4d24..c5cbc685c691 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -3112,7 +3112,7 @@ static u32 xhci_td_remainder(struct xhci_hcd *xhci, int transferred,
3112{ 3112{
3113 u32 maxp, total_packet_count; 3113 u32 maxp, total_packet_count;
3114 3114
3115 /* MTK xHCI is mostly 0.97 but contains some features from 1.0 */ 3115 /* MTK xHCI 0.96 contains some features from 1.0 */
3116 if (xhci->hci_version < 0x100 && !(xhci->quirks & XHCI_MTK_HOST)) 3116 if (xhci->hci_version < 0x100 && !(xhci->quirks & XHCI_MTK_HOST))
3117 return ((td_total_len - transferred) >> 10); 3117 return ((td_total_len - transferred) >> 10);
3118 3118
@@ -3121,8 +3121,8 @@ static u32 xhci_td_remainder(struct xhci_hcd *xhci, int transferred,
3121 trb_buff_len == td_total_len) 3121 trb_buff_len == td_total_len)
3122 return 0; 3122 return 0;
3123 3123
3124 /* for MTK xHCI, TD size doesn't include this TRB */ 3124 /* for MTK xHCI 0.96, TD size include this TRB, but not in 1.x */
3125 if (xhci->quirks & XHCI_MTK_HOST) 3125 if ((xhci->quirks & XHCI_MTK_HOST) && (xhci->hci_version < 0x100))
3126 trb_buff_len = 0; 3126 trb_buff_len = 0;
3127 3127
3128 maxp = usb_endpoint_maxp(&urb->ep->desc); 3128 maxp = usb_endpoint_maxp(&urb->ep->desc);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 2424d3020ca3..da6dbe3ebd8b 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -3525,8 +3525,6 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
3525 struct xhci_slot_ctx *slot_ctx; 3525 struct xhci_slot_ctx *slot_ctx;
3526 int i, ret; 3526 int i, ret;
3527 3527
3528 xhci_debugfs_remove_slot(xhci, udev->slot_id);
3529
3530#ifndef CONFIG_USB_DEFAULT_PERSIST 3528#ifndef CONFIG_USB_DEFAULT_PERSIST
3531 /* 3529 /*
3532 * We called pm_runtime_get_noresume when the device was attached. 3530 * We called pm_runtime_get_noresume when the device was attached.
@@ -3555,8 +3553,10 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
3555 } 3553 }
3556 3554
3557 ret = xhci_disable_slot(xhci, udev->slot_id); 3555 ret = xhci_disable_slot(xhci, udev->slot_id);
3558 if (ret) 3556 if (ret) {
3557 xhci_debugfs_remove_slot(xhci, udev->slot_id);
3559 xhci_free_virt_device(xhci, udev->slot_id); 3558 xhci_free_virt_device(xhci, udev->slot_id);
3559 }
3560} 3560}
3561 3561
3562int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) 3562int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id)
diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c
index 0397606a211b..6c036de63272 100644
--- a/drivers/usb/musb/da8xx.c
+++ b/drivers/usb/musb/da8xx.c
@@ -284,7 +284,15 @@ static irqreturn_t da8xx_musb_interrupt(int irq, void *hci)
284 musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE; 284 musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
285 portstate(musb->port1_status |= USB_PORT_STAT_POWER); 285 portstate(musb->port1_status |= USB_PORT_STAT_POWER);
286 del_timer(&musb->dev_timer); 286 del_timer(&musb->dev_timer);
287 } else { 287 } else if (!(musb->int_usb & MUSB_INTR_BABBLE)) {
288 /*
289 * When babble condition happens, drvvbus interrupt
290 * is also generated. Ignore this drvvbus interrupt
291 * and let babble interrupt handler recovers the
292 * controller; otherwise, the host-mode flag is lost
293 * due to the MUSB_DEV_MODE() call below and babble
294 * recovery logic will not be called.
295 */
288 musb->is_active = 0; 296 musb->is_active = 0;
289 MUSB_DEV_MODE(musb); 297 MUSB_DEV_MODE(musb);
290 otg->default_a = 0; 298 otg->default_a = 0;
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 1aba9105b369..fc68952c994a 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -1013,6 +1013,7 @@ static const struct usb_device_id id_table_combined[] = {
1013 .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, 1013 .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
1014 { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) }, 1014 { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) },
1015 { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) }, 1015 { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) },
1016 { USB_DEVICE(AIRBUS_DS_VID, AIRBUS_DS_P8GR) },
1016 { } /* Terminating entry */ 1017 { } /* Terminating entry */
1017}; 1018};
1018 1019
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 4faa09fe308c..8b4ecd2bd297 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -915,6 +915,12 @@
915#define ICPDAS_I7563U_PID 0x0105 915#define ICPDAS_I7563U_PID 0x0105
916 916
917/* 917/*
918 * Airbus Defence and Space
919 */
920#define AIRBUS_DS_VID 0x1e8e /* Vendor ID */
921#define AIRBUS_DS_P8GR 0x6001 /* Tetra P8GR */
922
923/*
918 * RT Systems programming cables for various ham radios 924 * RT Systems programming cables for various ham radios
919 */ 925 */
920#define RTSYSTEMS_VID 0x2100 /* Vendor ID */ 926#define RTSYSTEMS_VID 0x2100 /* Vendor ID */
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 3b3513874cfd..b6320e3be429 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -233,6 +233,8 @@ static void option_instat_callback(struct urb *urb);
233/* These Quectel products use Qualcomm's vendor ID */ 233/* These Quectel products use Qualcomm's vendor ID */
234#define QUECTEL_PRODUCT_UC20 0x9003 234#define QUECTEL_PRODUCT_UC20 0x9003
235#define QUECTEL_PRODUCT_UC15 0x9090 235#define QUECTEL_PRODUCT_UC15 0x9090
236/* These Yuga products use Qualcomm's vendor ID */
237#define YUGA_PRODUCT_CLM920_NC5 0x9625
236 238
237#define QUECTEL_VENDOR_ID 0x2c7c 239#define QUECTEL_VENDOR_ID 0x2c7c
238/* These Quectel products use Quectel's vendor ID */ 240/* These Quectel products use Quectel's vendor ID */
@@ -280,6 +282,7 @@ static void option_instat_callback(struct urb *urb);
280#define TELIT_PRODUCT_LE922_USBCFG3 0x1043 282#define TELIT_PRODUCT_LE922_USBCFG3 0x1043
281#define TELIT_PRODUCT_LE922_USBCFG5 0x1045 283#define TELIT_PRODUCT_LE922_USBCFG5 0x1045
282#define TELIT_PRODUCT_ME910 0x1100 284#define TELIT_PRODUCT_ME910 0x1100
285#define TELIT_PRODUCT_ME910_DUAL_MODEM 0x1101
283#define TELIT_PRODUCT_LE920 0x1200 286#define TELIT_PRODUCT_LE920 0x1200
284#define TELIT_PRODUCT_LE910 0x1201 287#define TELIT_PRODUCT_LE910 0x1201
285#define TELIT_PRODUCT_LE910_USBCFG4 0x1206 288#define TELIT_PRODUCT_LE910_USBCFG4 0x1206
@@ -645,6 +648,11 @@ static const struct option_blacklist_info telit_me910_blacklist = {
645 .reserved = BIT(1) | BIT(3), 648 .reserved = BIT(1) | BIT(3),
646}; 649};
647 650
651static const struct option_blacklist_info telit_me910_dual_modem_blacklist = {
652 .sendsetup = BIT(0),
653 .reserved = BIT(3),
654};
655
648static const struct option_blacklist_info telit_le910_blacklist = { 656static const struct option_blacklist_info telit_le910_blacklist = {
649 .sendsetup = BIT(0), 657 .sendsetup = BIT(0),
650 .reserved = BIT(1) | BIT(2), 658 .reserved = BIT(1) | BIT(2),
@@ -674,6 +682,10 @@ static const struct option_blacklist_info cinterion_rmnet2_blacklist = {
674 .reserved = BIT(4) | BIT(5), 682 .reserved = BIT(4) | BIT(5),
675}; 683};
676 684
685static const struct option_blacklist_info yuga_clm920_nc5_blacklist = {
686 .reserved = BIT(1) | BIT(4),
687};
688
677static const struct usb_device_id option_ids[] = { 689static const struct usb_device_id option_ids[] = {
678 { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, 690 { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) },
679 { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) }, 691 { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) },
@@ -1178,6 +1190,9 @@ static const struct usb_device_id option_ids[] = {
1178 { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)}, 1190 { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)},
1179 { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20), 1191 { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20),
1180 .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, 1192 .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
1193 /* Yuga products use Qualcomm vendor ID */
1194 { USB_DEVICE(QUALCOMM_VENDOR_ID, YUGA_PRODUCT_CLM920_NC5),
1195 .driver_info = (kernel_ulong_t)&yuga_clm920_nc5_blacklist },
1181 /* Quectel products using Quectel vendor ID */ 1196 /* Quectel products using Quectel vendor ID */
1182 { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21), 1197 { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21),
1183 .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, 1198 .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
@@ -1244,6 +1259,8 @@ static const struct usb_device_id option_ids[] = {
1244 .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, 1259 .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
1245 { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), 1260 { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910),
1246 .driver_info = (kernel_ulong_t)&telit_me910_blacklist }, 1261 .driver_info = (kernel_ulong_t)&telit_me910_blacklist },
1262 { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM),
1263 .driver_info = (kernel_ulong_t)&telit_me910_dual_modem_blacklist },
1247 { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), 1264 { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
1248 .driver_info = (kernel_ulong_t)&telit_le910_blacklist }, 1265 .driver_info = (kernel_ulong_t)&telit_le910_blacklist },
1249 { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), 1266 { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4),
diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index e3892541a489..613f91add03d 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -162,6 +162,8 @@ static const struct usb_device_id id_table[] = {
162 {DEVICE_SWI(0x1199, 0x9079)}, /* Sierra Wireless EM74xx */ 162 {DEVICE_SWI(0x1199, 0x9079)}, /* Sierra Wireless EM74xx */
163 {DEVICE_SWI(0x1199, 0x907a)}, /* Sierra Wireless EM74xx QDL */ 163 {DEVICE_SWI(0x1199, 0x907a)}, /* Sierra Wireless EM74xx QDL */
164 {DEVICE_SWI(0x1199, 0x907b)}, /* Sierra Wireless EM74xx */ 164 {DEVICE_SWI(0x1199, 0x907b)}, /* Sierra Wireless EM74xx */
165 {DEVICE_SWI(0x1199, 0x9090)}, /* Sierra Wireless EM7565 QDL */
166 {DEVICE_SWI(0x1199, 0x9091)}, /* Sierra Wireless EM7565 */
165 {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ 167 {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */
166 {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ 168 {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */
167 {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ 169 {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */
@@ -342,6 +344,7 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
342 break; 344 break;
343 case 2: 345 case 2:
344 dev_dbg(dev, "NMEA GPS interface found\n"); 346 dev_dbg(dev, "NMEA GPS interface found\n");
347 sendsetup = true;
345 break; 348 break;
346 case 3: 349 case 3:
347 dev_dbg(dev, "Modem port found\n"); 350 dev_dbg(dev, "Modem port found\n");
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 2968046e7c05..f72d045ee9ef 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -2100,6 +2100,13 @@ UNUSUAL_DEV( 0x152d, 0x0567, 0x0114, 0x0116,
2100 USB_SC_DEVICE, USB_PR_DEVICE, NULL, 2100 USB_SC_DEVICE, USB_PR_DEVICE, NULL,
2101 US_FL_BROKEN_FUA ), 2101 US_FL_BROKEN_FUA ),
2102 2102
2103/* Reported by David Kozub <zub@linux.fjfi.cvut.cz> */
2104UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999,
2105 "JMicron",
2106 "JMS567",
2107 USB_SC_DEVICE, USB_PR_DEVICE, NULL,
2108 US_FL_BROKEN_FUA),
2109
2103/* 2110/*
2104 * Reported by Alexandre Oliva <oliva@lsd.ic.unicamp.br> 2111 * Reported by Alexandre Oliva <oliva@lsd.ic.unicamp.br>
2105 * JMicron responds to USN and several other SCSI ioctls with a 2112 * JMicron responds to USN and several other SCSI ioctls with a
diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
index d520374a824e..e6127fb21c12 100644
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -129,6 +129,13 @@ UNUSUAL_DEV(0x152d, 0x0567, 0x0000, 0x9999,
129 USB_SC_DEVICE, USB_PR_DEVICE, NULL, 129 USB_SC_DEVICE, USB_PR_DEVICE, NULL,
130 US_FL_BROKEN_FUA | US_FL_NO_REPORT_OPCODES), 130 US_FL_BROKEN_FUA | US_FL_NO_REPORT_OPCODES),
131 131
132/* Reported-by: David Kozub <zub@linux.fjfi.cvut.cz> */
133UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999,
134 "JMicron",
135 "JMS567",
136 USB_SC_DEVICE, USB_PR_DEVICE, NULL,
137 US_FL_BROKEN_FUA),
138
132/* Reported-by: Hans de Goede <hdegoede@redhat.com> */ 139/* Reported-by: Hans de Goede <hdegoede@redhat.com> */
133UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999, 140UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999,
134 "VIA", 141 "VIA",
diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c
index a3df8ee82faf..e31a6f204397 100644
--- a/drivers/usb/usbip/stub_dev.c
+++ b/drivers/usb/usbip/stub_dev.c
@@ -149,8 +149,7 @@ static void stub_shutdown_connection(struct usbip_device *ud)
149 * step 1? 149 * step 1?
150 */ 150 */
151 if (ud->tcp_socket) { 151 if (ud->tcp_socket) {
152 dev_dbg(&sdev->udev->dev, "shutdown tcp_socket %p\n", 152 dev_dbg(&sdev->udev->dev, "shutdown sockfd %d\n", ud->sockfd);
153 ud->tcp_socket);
154 kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR); 153 kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR);
155 } 154 }
156 155
diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c
index 4f48b306713f..c31c8402a0c5 100644
--- a/drivers/usb/usbip/stub_main.c
+++ b/drivers/usb/usbip/stub_main.c
@@ -237,11 +237,12 @@ void stub_device_cleanup_urbs(struct stub_device *sdev)
237 struct stub_priv *priv; 237 struct stub_priv *priv;
238 struct urb *urb; 238 struct urb *urb;
239 239
240 dev_dbg(&sdev->udev->dev, "free sdev %p\n", sdev); 240 dev_dbg(&sdev->udev->dev, "Stub device cleaning up urbs\n");
241 241
242 while ((priv = stub_priv_pop(sdev))) { 242 while ((priv = stub_priv_pop(sdev))) {
243 urb = priv->urb; 243 urb = priv->urb;
244 dev_dbg(&sdev->udev->dev, "free urb %p\n", urb); 244 dev_dbg(&sdev->udev->dev, "free urb seqnum %lu\n",
245 priv->seqnum);
245 usb_kill_urb(urb); 246 usb_kill_urb(urb);
246 247
247 kmem_cache_free(stub_priv_cache, priv); 248 kmem_cache_free(stub_priv_cache, priv);
diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c
index 536e037f541f..6c5a59313999 100644
--- a/drivers/usb/usbip/stub_rx.c
+++ b/drivers/usb/usbip/stub_rx.c
@@ -211,9 +211,6 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev,
211 if (priv->seqnum != pdu->u.cmd_unlink.seqnum) 211 if (priv->seqnum != pdu->u.cmd_unlink.seqnum)
212 continue; 212 continue;
213 213
214 dev_info(&priv->urb->dev->dev, "unlink urb %p\n",
215 priv->urb);
216
217 /* 214 /*
218 * This matched urb is not completed yet (i.e., be in 215 * This matched urb is not completed yet (i.e., be in
219 * flight in usb hcd hardware/driver). Now we are 216 * flight in usb hcd hardware/driver). Now we are
@@ -252,8 +249,8 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev,
252 ret = usb_unlink_urb(priv->urb); 249 ret = usb_unlink_urb(priv->urb);
253 if (ret != -EINPROGRESS) 250 if (ret != -EINPROGRESS)
254 dev_err(&priv->urb->dev->dev, 251 dev_err(&priv->urb->dev->dev,
255 "failed to unlink a urb %p, ret %d\n", 252 "failed to unlink a urb # %lu, ret %d\n",
256 priv->urb, ret); 253 priv->seqnum, ret);
257 254
258 return 0; 255 return 0;
259 } 256 }
@@ -322,23 +319,26 @@ static struct stub_priv *stub_priv_alloc(struct stub_device *sdev,
322 return priv; 319 return priv;
323} 320}
324 321
325static int get_pipe(struct stub_device *sdev, int epnum, int dir) 322static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu)
326{ 323{
327 struct usb_device *udev = sdev->udev; 324 struct usb_device *udev = sdev->udev;
328 struct usb_host_endpoint *ep; 325 struct usb_host_endpoint *ep;
329 struct usb_endpoint_descriptor *epd = NULL; 326 struct usb_endpoint_descriptor *epd = NULL;
327 int epnum = pdu->base.ep;
328 int dir = pdu->base.direction;
329
330 if (epnum < 0 || epnum > 15)
331 goto err_ret;
330 332
331 if (dir == USBIP_DIR_IN) 333 if (dir == USBIP_DIR_IN)
332 ep = udev->ep_in[epnum & 0x7f]; 334 ep = udev->ep_in[epnum & 0x7f];
333 else 335 else
334 ep = udev->ep_out[epnum & 0x7f]; 336 ep = udev->ep_out[epnum & 0x7f];
335 if (!ep) { 337 if (!ep)
336 dev_err(&sdev->udev->dev, "no such endpoint?, %d\n", 338 goto err_ret;
337 epnum);
338 BUG();
339 }
340 339
341 epd = &ep->desc; 340 epd = &ep->desc;
341
342 if (usb_endpoint_xfer_control(epd)) { 342 if (usb_endpoint_xfer_control(epd)) {
343 if (dir == USBIP_DIR_OUT) 343 if (dir == USBIP_DIR_OUT)
344 return usb_sndctrlpipe(udev, epnum); 344 return usb_sndctrlpipe(udev, epnum);
@@ -361,15 +361,31 @@ static int get_pipe(struct stub_device *sdev, int epnum, int dir)
361 } 361 }
362 362
363 if (usb_endpoint_xfer_isoc(epd)) { 363 if (usb_endpoint_xfer_isoc(epd)) {
364 /* validate packet size and number of packets */
365 unsigned int maxp, packets, bytes;
366
367 maxp = usb_endpoint_maxp(epd);
368 maxp *= usb_endpoint_maxp_mult(epd);
369 bytes = pdu->u.cmd_submit.transfer_buffer_length;
370 packets = DIV_ROUND_UP(bytes, maxp);
371
372 if (pdu->u.cmd_submit.number_of_packets < 0 ||
373 pdu->u.cmd_submit.number_of_packets > packets) {
374 dev_err(&sdev->udev->dev,
375 "CMD_SUBMIT: isoc invalid num packets %d\n",
376 pdu->u.cmd_submit.number_of_packets);
377 return -1;
378 }
364 if (dir == USBIP_DIR_OUT) 379 if (dir == USBIP_DIR_OUT)
365 return usb_sndisocpipe(udev, epnum); 380 return usb_sndisocpipe(udev, epnum);
366 else 381 else
367 return usb_rcvisocpipe(udev, epnum); 382 return usb_rcvisocpipe(udev, epnum);
368 } 383 }
369 384
385err_ret:
370 /* NOT REACHED */ 386 /* NOT REACHED */
371 dev_err(&sdev->udev->dev, "get pipe, epnum %d\n", epnum); 387 dev_err(&sdev->udev->dev, "CMD_SUBMIT: invalid epnum %d\n", epnum);
372 return 0; 388 return -1;
373} 389}
374 390
375static void masking_bogus_flags(struct urb *urb) 391static void masking_bogus_flags(struct urb *urb)
@@ -433,7 +449,10 @@ static void stub_recv_cmd_submit(struct stub_device *sdev,
433 struct stub_priv *priv; 449 struct stub_priv *priv;
434 struct usbip_device *ud = &sdev->ud; 450 struct usbip_device *ud = &sdev->ud;
435 struct usb_device *udev = sdev->udev; 451 struct usb_device *udev = sdev->udev;
436 int pipe = get_pipe(sdev, pdu->base.ep, pdu->base.direction); 452 int pipe = get_pipe(sdev, pdu);
453
454 if (pipe == -1)
455 return;
437 456
438 priv = stub_priv_alloc(sdev, pdu); 457 priv = stub_priv_alloc(sdev, pdu);
439 if (!priv) 458 if (!priv)
diff --git a/drivers/usb/usbip/stub_tx.c b/drivers/usb/usbip/stub_tx.c
index b18bce96c212..f0ec41a50cbc 100644
--- a/drivers/usb/usbip/stub_tx.c
+++ b/drivers/usb/usbip/stub_tx.c
@@ -88,7 +88,7 @@ void stub_complete(struct urb *urb)
88 /* link a urb to the queue of tx. */ 88 /* link a urb to the queue of tx. */
89 spin_lock_irqsave(&sdev->priv_lock, flags); 89 spin_lock_irqsave(&sdev->priv_lock, flags);
90 if (sdev->ud.tcp_socket == NULL) { 90 if (sdev->ud.tcp_socket == NULL) {
91 usbip_dbg_stub_tx("ignore urb for closed connection %p", urb); 91 usbip_dbg_stub_tx("ignore urb for closed connection\n");
92 /* It will be freed in stub_device_cleanup_urbs(). */ 92 /* It will be freed in stub_device_cleanup_urbs(). */
93 } else if (priv->unlinking) { 93 } else if (priv->unlinking) {
94 stub_enqueue_ret_unlink(sdev, priv->seqnum, urb->status); 94 stub_enqueue_ret_unlink(sdev, priv->seqnum, urb->status);
@@ -167,6 +167,13 @@ static int stub_send_ret_submit(struct stub_device *sdev)
167 memset(&pdu_header, 0, sizeof(pdu_header)); 167 memset(&pdu_header, 0, sizeof(pdu_header));
168 memset(&msg, 0, sizeof(msg)); 168 memset(&msg, 0, sizeof(msg));
169 169
170 if (urb->actual_length > 0 && !urb->transfer_buffer) {
171 dev_err(&sdev->udev->dev,
172 "urb: actual_length %d transfer_buffer null\n",
173 urb->actual_length);
174 return -1;
175 }
176
170 if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) 177 if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS)
171 iovnum = 2 + urb->number_of_packets; 178 iovnum = 2 + urb->number_of_packets;
172 else 179 else
@@ -183,8 +190,8 @@ static int stub_send_ret_submit(struct stub_device *sdev)
183 190
184 /* 1. setup usbip_header */ 191 /* 1. setup usbip_header */
185 setup_ret_submit_pdu(&pdu_header, urb); 192 setup_ret_submit_pdu(&pdu_header, urb);
186 usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n", 193 usbip_dbg_stub_tx("setup txdata seqnum: %d\n",
187 pdu_header.base.seqnum, urb); 194 pdu_header.base.seqnum);
188 usbip_header_correct_endian(&pdu_header, 1); 195 usbip_header_correct_endian(&pdu_header, 1);
189 196
190 iov[iovnum].iov_base = &pdu_header; 197 iov[iovnum].iov_base = &pdu_header;
diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c
index f7978933b402..7b219d9109b4 100644
--- a/drivers/usb/usbip/usbip_common.c
+++ b/drivers/usb/usbip/usbip_common.c
@@ -317,26 +317,20 @@ int usbip_recv(struct socket *sock, void *buf, int size)
317 struct msghdr msg = {.msg_flags = MSG_NOSIGNAL}; 317 struct msghdr msg = {.msg_flags = MSG_NOSIGNAL};
318 int total = 0; 318 int total = 0;
319 319
320 if (!sock || !buf || !size)
321 return -EINVAL;
322
320 iov_iter_kvec(&msg.msg_iter, READ|ITER_KVEC, &iov, 1, size); 323 iov_iter_kvec(&msg.msg_iter, READ|ITER_KVEC, &iov, 1, size);
321 324
322 usbip_dbg_xmit("enter\n"); 325 usbip_dbg_xmit("enter\n");
323 326
324 if (!sock || !buf || !size) {
325 pr_err("invalid arg, sock %p buff %p size %d\n", sock, buf,
326 size);
327 return -EINVAL;
328 }
329
330 do { 327 do {
331 int sz = msg_data_left(&msg); 328 msg_data_left(&msg);
332 sock->sk->sk_allocation = GFP_NOIO; 329 sock->sk->sk_allocation = GFP_NOIO;
333 330
334 result = sock_recvmsg(sock, &msg, MSG_WAITALL); 331 result = sock_recvmsg(sock, &msg, MSG_WAITALL);
335 if (result <= 0) { 332 if (result <= 0)
336 pr_debug("receive sock %p buf %p size %u ret %d total %d\n",
337 sock, buf + total, sz, result, total);
338 goto err; 333 goto err;
339 }
340 334
341 total += result; 335 total += result;
342 } while (msg_data_left(&msg)); 336 } while (msg_data_left(&msg));
diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h
index e5de35c8c505..473fb8a87289 100644
--- a/drivers/usb/usbip/usbip_common.h
+++ b/drivers/usb/usbip/usbip_common.h
@@ -256,6 +256,7 @@ struct usbip_device {
256 /* lock for status */ 256 /* lock for status */
257 spinlock_t lock; 257 spinlock_t lock;
258 258
259 int sockfd;
259 struct socket *tcp_socket; 260 struct socket *tcp_socket;
260 261
261 struct task_struct *tcp_rx; 262 struct task_struct *tcp_rx;
diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c
index 6b3278c4b72a..c3e1008aa491 100644
--- a/drivers/usb/usbip/vhci_hcd.c
+++ b/drivers/usb/usbip/vhci_hcd.c
@@ -656,9 +656,6 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
656 struct vhci_device *vdev; 656 struct vhci_device *vdev;
657 unsigned long flags; 657 unsigned long flags;
658 658
659 usbip_dbg_vhci_hc("enter, usb_hcd %p urb %p mem_flags %d\n",
660 hcd, urb, mem_flags);
661
662 if (portnum > VHCI_HC_PORTS) { 659 if (portnum > VHCI_HC_PORTS) {
663 pr_err("invalid port number %d\n", portnum); 660 pr_err("invalid port number %d\n", portnum);
664 return -ENODEV; 661 return -ENODEV;
@@ -822,8 +819,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
822 struct vhci_device *vdev; 819 struct vhci_device *vdev;
823 unsigned long flags; 820 unsigned long flags;
824 821
825 pr_info("dequeue a urb %p\n", urb);
826
827 spin_lock_irqsave(&vhci->lock, flags); 822 spin_lock_irqsave(&vhci->lock, flags);
828 823
829 priv = urb->hcpriv; 824 priv = urb->hcpriv;
@@ -851,7 +846,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
851 /* tcp connection is closed */ 846 /* tcp connection is closed */
852 spin_lock(&vdev->priv_lock); 847 spin_lock(&vdev->priv_lock);
853 848
854 pr_info("device %p seems to be disconnected\n", vdev);
855 list_del(&priv->list); 849 list_del(&priv->list);
856 kfree(priv); 850 kfree(priv);
857 urb->hcpriv = NULL; 851 urb->hcpriv = NULL;
@@ -863,8 +857,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
863 * vhci_rx will receive RET_UNLINK and give back the URB. 857 * vhci_rx will receive RET_UNLINK and give back the URB.
864 * Otherwise, we give back it here. 858 * Otherwise, we give back it here.
865 */ 859 */
866 pr_info("gives back urb %p\n", urb);
867
868 usb_hcd_unlink_urb_from_ep(hcd, urb); 860 usb_hcd_unlink_urb_from_ep(hcd, urb);
869 861
870 spin_unlock_irqrestore(&vhci->lock, flags); 862 spin_unlock_irqrestore(&vhci->lock, flags);
@@ -892,8 +884,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
892 884
893 unlink->unlink_seqnum = priv->seqnum; 885 unlink->unlink_seqnum = priv->seqnum;
894 886
895 pr_info("device %p seems to be still connected\n", vdev);
896
897 /* send cmd_unlink and try to cancel the pending URB in the 887 /* send cmd_unlink and try to cancel the pending URB in the
898 * peer */ 888 * peer */
899 list_add_tail(&unlink->list, &vdev->unlink_tx); 889 list_add_tail(&unlink->list, &vdev->unlink_tx);
@@ -975,7 +965,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud)
975 965
976 /* need this? see stub_dev.c */ 966 /* need this? see stub_dev.c */
977 if (ud->tcp_socket) { 967 if (ud->tcp_socket) {
978 pr_debug("shutdown tcp_socket %p\n", ud->tcp_socket); 968 pr_debug("shutdown tcp_socket %d\n", ud->sockfd);
979 kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR); 969 kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR);
980 } 970 }
981 971
diff --git a/drivers/usb/usbip/vhci_rx.c b/drivers/usb/usbip/vhci_rx.c
index 90577e8b2282..112ebb90d8c9 100644
--- a/drivers/usb/usbip/vhci_rx.c
+++ b/drivers/usb/usbip/vhci_rx.c
@@ -23,24 +23,23 @@ struct urb *pickup_urb_and_free_priv(struct vhci_device *vdev, __u32 seqnum)
23 urb = priv->urb; 23 urb = priv->urb;
24 status = urb->status; 24 status = urb->status;
25 25
26 usbip_dbg_vhci_rx("find urb %p vurb %p seqnum %u\n", 26 usbip_dbg_vhci_rx("find urb seqnum %u\n", seqnum);
27 urb, priv, seqnum);
28 27
29 switch (status) { 28 switch (status) {
30 case -ENOENT: 29 case -ENOENT:
31 /* fall through */ 30 /* fall through */
32 case -ECONNRESET: 31 case -ECONNRESET:
33 dev_info(&urb->dev->dev, 32 dev_dbg(&urb->dev->dev,
34 "urb %p was unlinked %ssynchronuously.\n", urb, 33 "urb seq# %u was unlinked %ssynchronuously\n",
35 status == -ENOENT ? "" : "a"); 34 seqnum, status == -ENOENT ? "" : "a");
36 break; 35 break;
37 case -EINPROGRESS: 36 case -EINPROGRESS:
38 /* no info output */ 37 /* no info output */
39 break; 38 break;
40 default: 39 default:
41 dev_info(&urb->dev->dev, 40 dev_dbg(&urb->dev->dev,
42 "urb %p may be in a error, status %d\n", urb, 41 "urb seq# %u may be in a error, status %d\n",
43 status); 42 seqnum, status);
44 } 43 }
45 44
46 list_del(&priv->list); 45 list_del(&priv->list);
@@ -67,8 +66,8 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev,
67 spin_unlock_irqrestore(&vdev->priv_lock, flags); 66 spin_unlock_irqrestore(&vdev->priv_lock, flags);
68 67
69 if (!urb) { 68 if (!urb) {
70 pr_err("cannot find a urb of seqnum %u\n", pdu->base.seqnum); 69 pr_err("cannot find a urb of seqnum %u max seqnum %d\n",
71 pr_info("max seqnum %d\n", 70 pdu->base.seqnum,
72 atomic_read(&vhci_hcd->seqnum)); 71 atomic_read(&vhci_hcd->seqnum));
73 usbip_event_add(ud, VDEV_EVENT_ERROR_TCP); 72 usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
74 return; 73 return;
@@ -91,7 +90,7 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev,
91 if (usbip_dbg_flag_vhci_rx) 90 if (usbip_dbg_flag_vhci_rx)
92 usbip_dump_urb(urb); 91 usbip_dump_urb(urb);
93 92
94 usbip_dbg_vhci_rx("now giveback urb %p\n", urb); 93 usbip_dbg_vhci_rx("now giveback urb %u\n", pdu->base.seqnum);
95 94
96 spin_lock_irqsave(&vhci->lock, flags); 95 spin_lock_irqsave(&vhci->lock, flags);
97 usb_hcd_unlink_urb_from_ep(vhci_hcd_to_hcd(vhci_hcd), urb); 96 usb_hcd_unlink_urb_from_ep(vhci_hcd_to_hcd(vhci_hcd), urb);
@@ -158,7 +157,7 @@ static void vhci_recv_ret_unlink(struct vhci_device *vdev,
158 pr_info("the urb (seqnum %d) was already given back\n", 157 pr_info("the urb (seqnum %d) was already given back\n",
159 pdu->base.seqnum); 158 pdu->base.seqnum);
160 } else { 159 } else {
161 usbip_dbg_vhci_rx("now giveback urb %p\n", urb); 160 usbip_dbg_vhci_rx("now giveback urb %d\n", pdu->base.seqnum);
162 161
163 /* If unlink is successful, status is -ECONNRESET */ 162 /* If unlink is successful, status is -ECONNRESET */
164 urb->status = pdu->u.ret_unlink.status; 163 urb->status = pdu->u.ret_unlink.status;
diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c
index e78f7472cac4..091f76b7196d 100644
--- a/drivers/usb/usbip/vhci_sysfs.c
+++ b/drivers/usb/usbip/vhci_sysfs.c
@@ -17,15 +17,20 @@
17 17
18/* 18/*
19 * output example: 19 * output example:
20 * hub port sta spd dev socket local_busid 20 * hub port sta spd dev sockfd local_busid
21 * hs 0000 004 000 00000000 c5a7bb80 1-2.3 21 * hs 0000 004 000 00000000 3 1-2.3
22 * ................................................ 22 * ................................................
23 * ss 0008 004 000 00000000 d8cee980 2-3.4 23 * ss 0008 004 000 00000000 4 2-3.4
24 * ................................................ 24 * ................................................
25 * 25 *
26 * IP address can be retrieved from a socket pointer address by looking 26 * Output includes socket fd instead of socket pointer address to avoid
27 * up /proc/net/{tcp,tcp6}. Also, a userland program may remember a 27 * leaking kernel memory address in:
28 * port number and its peer IP address. 28 * /sys/devices/platform/vhci_hcd.0/status and in debug output.
29 * The socket pointer address is not used at the moment and it was made
30 * visible as a convenient way to find IP address from socket pointer
31 * address by looking up /proc/net/{tcp,tcp6}. As this opens a security
32 * hole, the change is made to use sockfd instead.
33 *
29 */ 34 */
30static void port_show_vhci(char **out, int hub, int port, struct vhci_device *vdev) 35static void port_show_vhci(char **out, int hub, int port, struct vhci_device *vdev)
31{ 36{
@@ -39,8 +44,8 @@ static void port_show_vhci(char **out, int hub, int port, struct vhci_device *vd
39 if (vdev->ud.status == VDEV_ST_USED) { 44 if (vdev->ud.status == VDEV_ST_USED) {
40 *out += sprintf(*out, "%03u %08x ", 45 *out += sprintf(*out, "%03u %08x ",
41 vdev->speed, vdev->devid); 46 vdev->speed, vdev->devid);
42 *out += sprintf(*out, "%16p %s", 47 *out += sprintf(*out, "%u %s",
43 vdev->ud.tcp_socket, 48 vdev->ud.sockfd,
44 dev_name(&vdev->udev->dev)); 49 dev_name(&vdev->udev->dev));
45 50
46 } else { 51 } else {
@@ -160,7 +165,8 @@ static ssize_t nports_show(struct device *dev, struct device_attribute *attr,
160 char *s = out; 165 char *s = out;
161 166
162 /* 167 /*
163 * Half the ports are for SPEED_HIGH and half for SPEED_SUPER, thus the * 2. 168 * Half the ports are for SPEED_HIGH and half for SPEED_SUPER,
169 * thus the * 2.
164 */ 170 */
165 out += sprintf(out, "%d\n", VHCI_PORTS * vhci_num_controllers); 171 out += sprintf(out, "%d\n", VHCI_PORTS * vhci_num_controllers);
166 return out - s; 172 return out - s;
@@ -366,6 +372,7 @@ static ssize_t store_attach(struct device *dev, struct device_attribute *attr,
366 372
367 vdev->devid = devid; 373 vdev->devid = devid;
368 vdev->speed = speed; 374 vdev->speed = speed;
375 vdev->ud.sockfd = sockfd;
369 vdev->ud.tcp_socket = socket; 376 vdev->ud.tcp_socket = socket;
370 vdev->ud.status = VDEV_ST_NOTASSIGNED; 377 vdev->ud.status = VDEV_ST_NOTASSIGNED;
371 378
diff --git a/drivers/usb/usbip/vhci_tx.c b/drivers/usb/usbip/vhci_tx.c
index d625a2ff4b71..9aed15a358b7 100644
--- a/drivers/usb/usbip/vhci_tx.c
+++ b/drivers/usb/usbip/vhci_tx.c
@@ -69,7 +69,8 @@ static int vhci_send_cmd_submit(struct vhci_device *vdev)
69 memset(&msg, 0, sizeof(msg)); 69 memset(&msg, 0, sizeof(msg));
70 memset(&iov, 0, sizeof(iov)); 70 memset(&iov, 0, sizeof(iov));
71 71
72 usbip_dbg_vhci_tx("setup txdata urb %p\n", urb); 72 usbip_dbg_vhci_tx("setup txdata urb seqnum %lu\n",
73 priv->seqnum);
73 74
74 /* 1. setup usbip_header */ 75 /* 1. setup usbip_header */
75 setup_cmd_submit_pdu(&pdu_header, urb); 76 setup_cmd_submit_pdu(&pdu_header, urb);
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index a9192fe4f345..c92131edfaba 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -522,10 +522,8 @@ static int virtio_mmio_probe(struct platform_device *pdev)
522 return -EBUSY; 522 return -EBUSY;
523 523
524 vm_dev = devm_kzalloc(&pdev->dev, sizeof(*vm_dev), GFP_KERNEL); 524 vm_dev = devm_kzalloc(&pdev->dev, sizeof(*vm_dev), GFP_KERNEL);
525 if (!vm_dev) { 525 if (!vm_dev)
526 rc = -ENOMEM; 526 return -ENOMEM;
527 goto free_mem;
528 }
529 527
530 vm_dev->vdev.dev.parent = &pdev->dev; 528 vm_dev->vdev.dev.parent = &pdev->dev;
531 vm_dev->vdev.dev.release = virtio_mmio_release_dev; 529 vm_dev->vdev.dev.release = virtio_mmio_release_dev;
@@ -535,17 +533,14 @@ static int virtio_mmio_probe(struct platform_device *pdev)
535 spin_lock_init(&vm_dev->lock); 533 spin_lock_init(&vm_dev->lock);
536 534
537 vm_dev->base = devm_ioremap(&pdev->dev, mem->start, resource_size(mem)); 535 vm_dev->base = devm_ioremap(&pdev->dev, mem->start, resource_size(mem));
538 if (vm_dev->base == NULL) { 536 if (vm_dev->base == NULL)
539 rc = -EFAULT; 537 return -EFAULT;
540 goto free_vmdev;
541 }
542 538
543 /* Check magic value */ 539 /* Check magic value */
544 magic = readl(vm_dev->base + VIRTIO_MMIO_MAGIC_VALUE); 540 magic = readl(vm_dev->base + VIRTIO_MMIO_MAGIC_VALUE);
545 if (magic != ('v' | 'i' << 8 | 'r' << 16 | 't' << 24)) { 541 if (magic != ('v' | 'i' << 8 | 'r' << 16 | 't' << 24)) {
546 dev_warn(&pdev->dev, "Wrong magic value 0x%08lx!\n", magic); 542 dev_warn(&pdev->dev, "Wrong magic value 0x%08lx!\n", magic);
547 rc = -ENODEV; 543 return -ENODEV;
548 goto unmap;
549 } 544 }
550 545
551 /* Check device version */ 546 /* Check device version */
@@ -553,8 +548,7 @@ static int virtio_mmio_probe(struct platform_device *pdev)
553 if (vm_dev->version < 1 || vm_dev->version > 2) { 548 if (vm_dev->version < 1 || vm_dev->version > 2) {
554 dev_err(&pdev->dev, "Version %ld not supported!\n", 549 dev_err(&pdev->dev, "Version %ld not supported!\n",
555 vm_dev->version); 550 vm_dev->version);
556 rc = -ENXIO; 551 return -ENXIO;
557 goto unmap;
558 } 552 }
559 553
560 vm_dev->vdev.id.device = readl(vm_dev->base + VIRTIO_MMIO_DEVICE_ID); 554 vm_dev->vdev.id.device = readl(vm_dev->base + VIRTIO_MMIO_DEVICE_ID);
@@ -563,8 +557,7 @@ static int virtio_mmio_probe(struct platform_device *pdev)
563 * virtio-mmio device with an ID 0 is a (dummy) placeholder 557 * virtio-mmio device with an ID 0 is a (dummy) placeholder
564 * with no function. End probing now with no error reported. 558 * with no function. End probing now with no error reported.
565 */ 559 */
566 rc = -ENODEV; 560 return -ENODEV;
567 goto unmap;
568 } 561 }
569 vm_dev->vdev.id.vendor = readl(vm_dev->base + VIRTIO_MMIO_VENDOR_ID); 562 vm_dev->vdev.id.vendor = readl(vm_dev->base + VIRTIO_MMIO_VENDOR_ID);
570 563
@@ -590,33 +583,15 @@ static int virtio_mmio_probe(struct platform_device *pdev)
590 platform_set_drvdata(pdev, vm_dev); 583 platform_set_drvdata(pdev, vm_dev);
591 584
592 rc = register_virtio_device(&vm_dev->vdev); 585 rc = register_virtio_device(&vm_dev->vdev);
593 if (rc) { 586 if (rc)
594 iounmap(vm_dev->base);
595 devm_release_mem_region(&pdev->dev, mem->start,
596 resource_size(mem));
597 put_device(&vm_dev->vdev.dev); 587 put_device(&vm_dev->vdev.dev);
598 } 588
599 return rc;
600unmap:
601 iounmap(vm_dev->base);
602free_mem:
603 devm_release_mem_region(&pdev->dev, mem->start,
604 resource_size(mem));
605free_vmdev:
606 devm_kfree(&pdev->dev, vm_dev);
607 return rc; 589 return rc;
608} 590}
609 591
610static int virtio_mmio_remove(struct platform_device *pdev) 592static int virtio_mmio_remove(struct platform_device *pdev)
611{ 593{
612 struct virtio_mmio_device *vm_dev = platform_get_drvdata(pdev); 594 struct virtio_mmio_device *vm_dev = platform_get_drvdata(pdev);
613 struct resource *mem;
614
615 iounmap(vm_dev->base);
616 mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
617 if (mem)
618 devm_release_mem_region(&pdev->dev, mem->start,
619 resource_size(mem));
620 unregister_virtio_device(&vm_dev->vdev); 595 unregister_virtio_device(&vm_dev->vdev);
621 596
622 return 0; 597 return 0;
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index d8dd54678ab7..e5d0c28372ea 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -269,7 +269,7 @@ config XEN_ACPI_HOTPLUG_CPU
269 269
270config XEN_ACPI_PROCESSOR 270config XEN_ACPI_PROCESSOR
271 tristate "Xen ACPI processor" 271 tristate "Xen ACPI processor"
272 depends on XEN && X86 && ACPI_PROCESSOR && CPU_FREQ 272 depends on XEN && XEN_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ
273 default m 273 default m
274 help 274 help
275 This ACPI processor uploads Power Management information to the Xen 275 This ACPI processor uploads Power Management information to the Xen
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index f77e499afddd..065f0b607373 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -257,10 +257,25 @@ static void release_memory_resource(struct resource *resource)
257 kfree(resource); 257 kfree(resource);
258} 258}
259 259
260/*
261 * Host memory not allocated to dom0. We can use this range for hotplug-based
262 * ballooning.
263 *
264 * It's a type-less resource. Setting IORESOURCE_MEM will make resource
265 * management algorithms (arch_remove_reservations()) look into guest e820,
266 * which we don't want.
267 */
268static struct resource hostmem_resource = {
269 .name = "Host RAM",
270};
271
272void __attribute__((weak)) __init arch_xen_balloon_init(struct resource *res)
273{}
274
260static struct resource *additional_memory_resource(phys_addr_t size) 275static struct resource *additional_memory_resource(phys_addr_t size)
261{ 276{
262 struct resource *res; 277 struct resource *res, *res_hostmem;
263 int ret; 278 int ret = -ENOMEM;
264 279
265 res = kzalloc(sizeof(*res), GFP_KERNEL); 280 res = kzalloc(sizeof(*res), GFP_KERNEL);
266 if (!res) 281 if (!res)
@@ -269,13 +284,42 @@ static struct resource *additional_memory_resource(phys_addr_t size)
269 res->name = "System RAM"; 284 res->name = "System RAM";
270 res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; 285 res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
271 286
272 ret = allocate_resource(&iomem_resource, res, 287 res_hostmem = kzalloc(sizeof(*res), GFP_KERNEL);
273 size, 0, -1, 288 if (res_hostmem) {
274 PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); 289 /* Try to grab a range from hostmem */
275 if (ret < 0) { 290 res_hostmem->name = "Host memory";
276 pr_err("Cannot allocate new System RAM resource\n"); 291 ret = allocate_resource(&hostmem_resource, res_hostmem,
277 kfree(res); 292 size, 0, -1,
278 return NULL; 293 PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
294 }
295
296 if (!ret) {
297 /*
298 * Insert this resource into iomem. Because hostmem_resource
299 * tracks portion of guest e820 marked as UNUSABLE noone else
300 * should try to use it.
301 */
302 res->start = res_hostmem->start;
303 res->end = res_hostmem->end;
304 ret = insert_resource(&iomem_resource, res);
305 if (ret < 0) {
306 pr_err("Can't insert iomem_resource [%llx - %llx]\n",
307 res->start, res->end);
308 release_memory_resource(res_hostmem);
309 res_hostmem = NULL;
310 res->start = res->end = 0;
311 }
312 }
313
314 if (ret) {
315 ret = allocate_resource(&iomem_resource, res,
316 size, 0, -1,
317 PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
318 if (ret < 0) {
319 pr_err("Cannot allocate new System RAM resource\n");
320 kfree(res);
321 return NULL;
322 }
279 } 323 }
280 324
281#ifdef CONFIG_SPARSEMEM 325#ifdef CONFIG_SPARSEMEM
@@ -287,6 +331,7 @@ static struct resource *additional_memory_resource(phys_addr_t size)
287 pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n", 331 pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n",
288 pfn, limit); 332 pfn, limit);
289 release_memory_resource(res); 333 release_memory_resource(res);
334 release_memory_resource(res_hostmem);
290 return NULL; 335 return NULL;
291 } 336 }
292 } 337 }
@@ -765,6 +810,8 @@ static int __init balloon_init(void)
765 set_online_page_callback(&xen_online_page); 810 set_online_page_callback(&xen_online_page);
766 register_memory_notifier(&xen_memory_nb); 811 register_memory_notifier(&xen_memory_nb);
767 register_sysctl_table(xen_root); 812 register_sysctl_table(xen_root);
813
814 arch_xen_balloon_init(&hostmem_resource);
768#endif 815#endif
769 816
770#ifdef CONFIG_XEN_PV 817#ifdef CONFIG_XEN_PV
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 8fc41705c7cd..961a12dc6dc8 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -170,7 +170,6 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
170 170
171 mutex_unlock(&sbi->wq_mutex); 171 mutex_unlock(&sbi->wq_mutex);
172 172
173 if (autofs4_write(sbi, pipe, &pkt, pktsz))
174 switch (ret = autofs4_write(sbi, pipe, &pkt, pktsz)) { 173 switch (ret = autofs4_write(sbi, pipe, &pkt, pktsz)) {
175 case 0: 174 case 0:
176 break; 175 break;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index ab69dcb70e8a..1b468250e947 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1440,6 +1440,29 @@ static int __close_session(struct ceph_mds_client *mdsc,
1440 return request_close_session(mdsc, session); 1440 return request_close_session(mdsc, session);
1441} 1441}
1442 1442
1443static bool drop_negative_children(struct dentry *dentry)
1444{
1445 struct dentry *child;
1446 bool all_negative = true;
1447
1448 if (!d_is_dir(dentry))
1449 goto out;
1450
1451 spin_lock(&dentry->d_lock);
1452 list_for_each_entry(child, &dentry->d_subdirs, d_child) {
1453 if (d_really_is_positive(child)) {
1454 all_negative = false;
1455 break;
1456 }
1457 }
1458 spin_unlock(&dentry->d_lock);
1459
1460 if (all_negative)
1461 shrink_dcache_parent(dentry);
1462out:
1463 return all_negative;
1464}
1465
1443/* 1466/*
1444 * Trim old(er) caps. 1467 * Trim old(er) caps.
1445 * 1468 *
@@ -1490,16 +1513,27 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
1490 if ((used | wanted) & ~oissued & mine) 1513 if ((used | wanted) & ~oissued & mine)
1491 goto out; /* we need these caps */ 1514 goto out; /* we need these caps */
1492 1515
1493 session->s_trim_caps--;
1494 if (oissued) { 1516 if (oissued) {
1495 /* we aren't the only cap.. just remove us */ 1517 /* we aren't the only cap.. just remove us */
1496 __ceph_remove_cap(cap, true); 1518 __ceph_remove_cap(cap, true);
1519 session->s_trim_caps--;
1497 } else { 1520 } else {
1521 struct dentry *dentry;
1498 /* try dropping referring dentries */ 1522 /* try dropping referring dentries */
1499 spin_unlock(&ci->i_ceph_lock); 1523 spin_unlock(&ci->i_ceph_lock);
1500 d_prune_aliases(inode); 1524 dentry = d_find_any_alias(inode);
1501 dout("trim_caps_cb %p cap %p pruned, count now %d\n", 1525 if (dentry && drop_negative_children(dentry)) {
1502 inode, cap, atomic_read(&inode->i_count)); 1526 int count;
1527 dput(dentry);
1528 d_prune_aliases(inode);
1529 count = atomic_read(&inode->i_count);
1530 if (count == 1)
1531 session->s_trim_caps--;
1532 dout("trim_caps_cb %p cap %p pruned, count now %d\n",
1533 inode, cap, count);
1534 } else {
1535 dput(dentry);
1536 }
1503 return 0; 1537 return 0;
1504 } 1538 }
1505 1539
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index e06740436b92..ed88ab8a4774 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1406,7 +1406,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
1406 } while (rc == -EAGAIN); 1406 } while (rc == -EAGAIN);
1407 1407
1408 if (rc) { 1408 if (rc) {
1409 cifs_dbg(VFS, "ioctl error in smb2_get_dfs_refer rc=%d\n", rc); 1409 if (rc != -ENOENT)
1410 cifs_dbg(VFS, "ioctl error in smb2_get_dfs_refer rc=%d\n", rc);
1410 goto out; 1411 goto out;
1411 } 1412 }
1412 1413
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 5331631386a2..01346b8b6edb 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2678,27 +2678,27 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
2678 cifs_small_buf_release(req); 2678 cifs_small_buf_release(req);
2679 2679
2680 rsp = (struct smb2_read_rsp *)rsp_iov.iov_base; 2680 rsp = (struct smb2_read_rsp *)rsp_iov.iov_base;
2681 shdr = get_sync_hdr(rsp);
2682 2681
2683 if (shdr->Status == STATUS_END_OF_FILE) { 2682 if (rc) {
2683 if (rc != -ENODATA) {
2684 cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE);
2685 cifs_dbg(VFS, "Send error in read = %d\n", rc);
2686 }
2684 free_rsp_buf(resp_buftype, rsp_iov.iov_base); 2687 free_rsp_buf(resp_buftype, rsp_iov.iov_base);
2685 return 0; 2688 return rc == -ENODATA ? 0 : rc;
2686 } 2689 }
2687 2690
2688 if (rc) { 2691 *nbytes = le32_to_cpu(rsp->DataLength);
2689 cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE); 2692 if ((*nbytes > CIFS_MAX_MSGSIZE) ||
2690 cifs_dbg(VFS, "Send error in read = %d\n", rc); 2693 (*nbytes > io_parms->length)) {
2691 } else { 2694 cifs_dbg(FYI, "bad length %d for count %d\n",
2692 *nbytes = le32_to_cpu(rsp->DataLength); 2695 *nbytes, io_parms->length);
2693 if ((*nbytes > CIFS_MAX_MSGSIZE) || 2696 rc = -EIO;
2694 (*nbytes > io_parms->length)) { 2697 *nbytes = 0;
2695 cifs_dbg(FYI, "bad length %d for count %d\n",
2696 *nbytes, io_parms->length);
2697 rc = -EIO;
2698 *nbytes = 0;
2699 }
2700 } 2698 }
2701 2699
2700 shdr = get_sync_hdr(rsp);
2701
2702 if (*buf) { 2702 if (*buf) {
2703 memcpy(*buf, (char *)shdr + rsp->DataOffset, *nbytes); 2703 memcpy(*buf, (char *)shdr + rsp->DataOffset, *nbytes);
2704 free_rsp_buf(resp_buftype, rsp_iov.iov_base); 2704 free_rsp_buf(resp_buftype, rsp_iov.iov_base);
diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig
index f937082f3244..58e2fe40b2a0 100644
--- a/fs/cramfs/Kconfig
+++ b/fs/cramfs/Kconfig
@@ -34,6 +34,7 @@ config CRAMFS_BLOCKDEV
34config CRAMFS_MTD 34config CRAMFS_MTD
35 bool "Support CramFs image directly mapped in physical memory" 35 bool "Support CramFs image directly mapped in physical memory"
36 depends on CRAMFS && MTD 36 depends on CRAMFS && MTD
37 depends on CRAMFS=m || MTD=y
37 default y if !CRAMFS_BLOCKDEV 38 default y if !CRAMFS_BLOCKDEV
38 help 39 help
39 This option allows the CramFs driver to load data directly from 40 This option allows the CramFs driver to load data directly from
diff --git a/fs/dax.c b/fs/dax.c
index 78b72c48374e..95981591977a 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -627,8 +627,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
627 627
628 if (pfn != pmd_pfn(*pmdp)) 628 if (pfn != pmd_pfn(*pmdp))
629 goto unlock_pmd; 629 goto unlock_pmd;
630 if (!pmd_dirty(*pmdp) 630 if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
631 && !pmd_access_permitted(*pmdp, WRITE))
632 goto unlock_pmd; 631 goto unlock_pmd;
633 632
634 flush_cache_page(vma, address, pfn); 633 flush_cache_page(vma, address, pfn);
diff --git a/fs/exec.c b/fs/exec.c
index 6be2aa0ab26f..5688b5e1b937 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1216,15 +1216,14 @@ killed:
1216 return -EAGAIN; 1216 return -EAGAIN;
1217} 1217}
1218 1218
1219char *get_task_comm(char *buf, struct task_struct *tsk) 1219char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
1220{ 1220{
1221 /* buf must be at least sizeof(tsk->comm) in size */
1222 task_lock(tsk); 1221 task_lock(tsk);
1223 strncpy(buf, tsk->comm, sizeof(tsk->comm)); 1222 strncpy(buf, tsk->comm, buf_size);
1224 task_unlock(tsk); 1223 task_unlock(tsk);
1225 return buf; 1224 return buf;
1226} 1225}
1227EXPORT_SYMBOL_GPL(get_task_comm); 1226EXPORT_SYMBOL_GPL(__get_task_comm);
1228 1227
1229/* 1228/*
1230 * These functions flushes out all traces of the currently running executable 1229 * These functions flushes out all traces of the currently running executable
@@ -1340,15 +1339,10 @@ void setup_new_exec(struct linux_binprm * bprm)
1340 * avoid bad behavior from the prior rlimits. This has to 1339 * avoid bad behavior from the prior rlimits. This has to
1341 * happen before arch_pick_mmap_layout(), which examines 1340 * happen before arch_pick_mmap_layout(), which examines
1342 * RLIMIT_STACK, but after the point of no return to avoid 1341 * RLIMIT_STACK, but after the point of no return to avoid
1343 * races from other threads changing the limits. This also 1342 * needing to clean up the change on failure.
1344 * must be protected from races with prlimit() calls.
1345 */ 1343 */
1346 task_lock(current->group_leader);
1347 if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM) 1344 if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM)
1348 current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM; 1345 current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM;
1349 if (current->signal->rlim[RLIMIT_STACK].rlim_max > _STK_LIM)
1350 current->signal->rlim[RLIMIT_STACK].rlim_max = _STK_LIM;
1351 task_unlock(current->group_leader);
1352 } 1346 }
1353 1347
1354 arch_pick_mmap_layout(current->mm); 1348 arch_pick_mmap_layout(current->mm);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 07bca11749d4..c941251ac0c0 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4722,6 +4722,7 @@ retry:
4722 EXT4_INODE_EOFBLOCKS); 4722 EXT4_INODE_EOFBLOCKS);
4723 } 4723 }
4724 ext4_mark_inode_dirty(handle, inode); 4724 ext4_mark_inode_dirty(handle, inode);
4725 ext4_update_inode_fsync_trans(handle, inode, 1);
4725 ret2 = ext4_journal_stop(handle); 4726 ret2 = ext4_journal_stop(handle);
4726 if (ret2) 4727 if (ret2)
4727 break; 4728 break;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index b4267d72f249..b32cf263750d 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -816,6 +816,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
816#ifdef CONFIG_EXT4_FS_POSIX_ACL 816#ifdef CONFIG_EXT4_FS_POSIX_ACL
817 struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT); 817 struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT);
818 818
819 if (IS_ERR(p))
820 return ERR_CAST(p);
819 if (p) { 821 if (p) {
820 int acl_size = p->a_count * sizeof(ext4_acl_entry); 822 int acl_size = p->a_count * sizeof(ext4_acl_entry);
821 823
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7df2c5644e59..534a9130f625 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -149,6 +149,15 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
149 */ 149 */
150int ext4_inode_is_fast_symlink(struct inode *inode) 150int ext4_inode_is_fast_symlink(struct inode *inode)
151{ 151{
152 if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
153 int ea_blocks = EXT4_I(inode)->i_file_acl ?
154 EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0;
155
156 if (ext4_has_inline_data(inode))
157 return 0;
158
159 return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
160 }
152 return S_ISLNK(inode->i_mode) && inode->i_size && 161 return S_ISLNK(inode->i_mode) && inode->i_size &&
153 (inode->i_size < EXT4_N_BLOCKS * 4); 162 (inode->i_size < EXT4_N_BLOCKS * 4);
154} 163}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 798b3ac680db..e750d68fbcb5 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1399,6 +1399,10 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
1399 "falling back\n")); 1399 "falling back\n"));
1400 } 1400 }
1401 nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); 1401 nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1402 if (!nblocks) {
1403 ret = NULL;
1404 goto cleanup_and_exit;
1405 }
1402 start = EXT4_I(dir)->i_dir_start_lookup; 1406 start = EXT4_I(dir)->i_dir_start_lookup;
1403 if (start >= nblocks) 1407 if (start >= nblocks)
1404 start = 0; 1408 start = 0;
diff --git a/fs/namespace.c b/fs/namespace.c
index e158ec6b527b..9d1374ab6e06 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2826,6 +2826,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
2826 SB_DIRSYNC | 2826 SB_DIRSYNC |
2827 SB_SILENT | 2827 SB_SILENT |
2828 SB_POSIXACL | 2828 SB_POSIXACL |
2829 SB_LAZYTIME |
2829 SB_I_VERSION); 2830 SB_I_VERSION);
2830 2831
2831 if (flags & MS_REMOUNT) 2832 if (flags & MS_REMOUNT)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 0ac2fb1c6b63..b9129e2befea 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -291,12 +291,23 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
291 const struct sockaddr *sap = data->addr; 291 const struct sockaddr *sap = data->addr;
292 struct nfs_net *nn = net_generic(data->net, nfs_net_id); 292 struct nfs_net *nn = net_generic(data->net, nfs_net_id);
293 293
294again:
294 list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { 295 list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
295 const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; 296 const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
296 /* Don't match clients that failed to initialise properly */ 297 /* Don't match clients that failed to initialise properly */
297 if (clp->cl_cons_state < 0) 298 if (clp->cl_cons_state < 0)
298 continue; 299 continue;
299 300
301 /* If a client is still initializing then we need to wait */
302 if (clp->cl_cons_state > NFS_CS_READY) {
303 refcount_inc(&clp->cl_count);
304 spin_unlock(&nn->nfs_client_lock);
305 nfs_wait_client_init_complete(clp);
306 nfs_put_client(clp);
307 spin_lock(&nn->nfs_client_lock);
308 goto again;
309 }
310
300 /* Different NFS versions cannot share the same nfs_client */ 311 /* Different NFS versions cannot share the same nfs_client */
301 if (clp->rpc_ops != data->nfs_mod->rpc_ops) 312 if (clp->rpc_ops != data->nfs_mod->rpc_ops)
302 continue; 313 continue;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 12bbab0becb4..65a7e5da508c 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -404,15 +404,19 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
404 if (error < 0) 404 if (error < 0)
405 goto error; 405 goto error;
406 406
407 if (!nfs4_has_session(clp))
408 nfs_mark_client_ready(clp, NFS_CS_READY);
409
410 error = nfs4_discover_server_trunking(clp, &old); 407 error = nfs4_discover_server_trunking(clp, &old);
411 if (error < 0) 408 if (error < 0)
412 goto error; 409 goto error;
413 410
414 if (clp != old) 411 if (clp != old) {
415 clp->cl_preserve_clid = true; 412 clp->cl_preserve_clid = true;
413 /*
414 * Mark the client as having failed initialization so other
415 * processes walking the nfs_client_list in nfs_match_client()
416 * won't try to use it.
417 */
418 nfs_mark_client_ready(clp, -EPERM);
419 }
416 nfs_put_client(clp); 420 nfs_put_client(clp);
417 clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags); 421 clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags);
418 return old; 422 return old;
@@ -539,6 +543,9 @@ int nfs40_walk_client_list(struct nfs_client *new,
539 spin_lock(&nn->nfs_client_lock); 543 spin_lock(&nn->nfs_client_lock);
540 list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { 544 list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
541 545
546 if (pos == new)
547 goto found;
548
542 status = nfs4_match_client(pos, new, &prev, nn); 549 status = nfs4_match_client(pos, new, &prev, nn);
543 if (status < 0) 550 if (status < 0)
544 goto out_unlock; 551 goto out_unlock;
@@ -559,6 +566,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
559 * way that a SETCLIENTID_CONFIRM to pos can succeed is 566 * way that a SETCLIENTID_CONFIRM to pos can succeed is
560 * if new and pos point to the same server: 567 * if new and pos point to the same server:
561 */ 568 */
569found:
562 refcount_inc(&pos->cl_count); 570 refcount_inc(&pos->cl_count);
563 spin_unlock(&nn->nfs_client_lock); 571 spin_unlock(&nn->nfs_client_lock);
564 572
@@ -572,6 +580,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
572 case 0: 580 case 0:
573 nfs4_swap_callback_idents(pos, new); 581 nfs4_swap_callback_idents(pos, new);
574 pos->cl_confirm = new->cl_confirm; 582 pos->cl_confirm = new->cl_confirm;
583 nfs_mark_client_ready(pos, NFS_CS_READY);
575 584
576 prev = NULL; 585 prev = NULL;
577 *result = pos; 586 *result = pos;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5b5f464f6f2a..4a379d7918f2 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1890,6 +1890,8 @@ int nfs_commit_inode(struct inode *inode, int how)
1890 if (res) 1890 if (res)
1891 error = nfs_generic_commit_list(inode, &head, how, &cinfo); 1891 error = nfs_generic_commit_list(inode, &head, how, &cinfo);
1892 nfs_commit_end(cinfo.mds); 1892 nfs_commit_end(cinfo.mds);
1893 if (res == 0)
1894 return res;
1893 if (error < 0) 1895 if (error < 0)
1894 goto out_error; 1896 goto out_error;
1895 if (!may_wait) 1897 if (!may_wait)
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 697f8ae7792d..f650e475d8f0 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -60,6 +60,9 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
60 gi->gid[i] = exp->ex_anon_gid; 60 gi->gid[i] = exp->ex_anon_gid;
61 else 61 else
62 gi->gid[i] = rqgi->gid[i]; 62 gi->gid[i] = rqgi->gid[i];
63
64 /* Each thread allocates its own gi, no race */
65 groups_sort(gi);
63 } 66 }
64 } else { 67 } else {
65 gi = get_group_info(rqgi); 68 gi = get_group_info(rqgi);
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index cbfc196e5dc5..5ac415466861 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -24,6 +24,16 @@ config OVERLAY_FS_REDIRECT_DIR
24 an overlay which has redirects on a kernel that doesn't support this 24 an overlay which has redirects on a kernel that doesn't support this
25 feature will have unexpected results. 25 feature will have unexpected results.
26 26
27config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW
28 bool "Overlayfs: follow redirects even if redirects are turned off"
29 default y
30 depends on OVERLAY_FS
31 help
32 Disable this to get a possibly more secure configuration, but that
33 might not be backward compatible with previous kernels.
34
35 For more information, see Documentation/filesystems/overlayfs.txt
36
27config OVERLAY_FS_INDEX 37config OVERLAY_FS_INDEX
28 bool "Overlayfs: turn on inodes index feature by default" 38 bool "Overlayfs: turn on inodes index feature by default"
29 depends on OVERLAY_FS 39 depends on OVERLAY_FS
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index e13921824c70..f9788bc116a8 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -887,7 +887,8 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
887 spin_unlock(&dentry->d_lock); 887 spin_unlock(&dentry->d_lock);
888 } else { 888 } else {
889 kfree(redirect); 889 kfree(redirect);
890 pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err); 890 pr_warn_ratelimited("overlayfs: failed to set redirect (%i)\n",
891 err);
891 /* Fall back to userspace copy-up */ 892 /* Fall back to userspace copy-up */
892 err = -EXDEV; 893 err = -EXDEV;
893 } 894 }
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 625ed8066570..beb945e1963c 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -435,7 +435,7 @@ int ovl_verify_index(struct dentry *index, struct ovl_path *lower,
435 435
436 /* Check if index is orphan and don't warn before cleaning it */ 436 /* Check if index is orphan and don't warn before cleaning it */
437 if (d_inode(index)->i_nlink == 1 && 437 if (d_inode(index)->i_nlink == 1 &&
438 ovl_get_nlink(index, origin.dentry, 0) == 0) 438 ovl_get_nlink(origin.dentry, index, 0) == 0)
439 err = -ENOENT; 439 err = -ENOENT;
440 440
441 dput(origin.dentry); 441 dput(origin.dentry);
@@ -681,6 +681,22 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
681 if (d.stop) 681 if (d.stop)
682 break; 682 break;
683 683
684 /*
685 * Following redirects can have security consequences: it's like
686 * a symlink into the lower layer without the permission checks.
687 * This is only a problem if the upper layer is untrusted (e.g
688 * comes from an USB drive). This can allow a non-readable file
689 * or directory to become readable.
690 *
691 * Only following redirects when redirects are enabled disables
692 * this attack vector when not necessary.
693 */
694 err = -EPERM;
695 if (d.redirect && !ofs->config.redirect_follow) {
696 pr_warn_ratelimited("overlay: refusing to follow redirect for (%pd2)\n", dentry);
697 goto out_put;
698 }
699
684 if (d.redirect && d.redirect[0] == '/' && poe != roe) { 700 if (d.redirect && d.redirect[0] == '/' && poe != roe) {
685 poe = roe; 701 poe = roe;
686 702
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 13eab09a6b6f..b489099ccd49 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -180,7 +180,7 @@ static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
180static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode) 180static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
181{ 181{
182 struct dentry *ret = vfs_tmpfile(dentry, mode, 0); 182 struct dentry *ret = vfs_tmpfile(dentry, mode, 0);
183 int err = IS_ERR(ret) ? PTR_ERR(ret) : 0; 183 int err = PTR_ERR_OR_ZERO(ret);
184 184
185 pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err); 185 pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
186 return ret; 186 return ret;
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 752bab645879..9d0bc03bf6e4 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -14,6 +14,8 @@ struct ovl_config {
14 char *workdir; 14 char *workdir;
15 bool default_permissions; 15 bool default_permissions;
16 bool redirect_dir; 16 bool redirect_dir;
17 bool redirect_follow;
18 const char *redirect_mode;
17 bool index; 19 bool index;
18}; 20};
19 21
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 0daa4354fec4..8c98578d27a1 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -499,7 +499,7 @@ out:
499 return err; 499 return err;
500 500
501fail: 501fail:
502 pr_warn_ratelimited("overlay: failed to look up (%s) for ino (%i)\n", 502 pr_warn_ratelimited("overlayfs: failed to look up (%s) for ino (%i)\n",
503 p->name, err); 503 p->name, err);
504 goto out; 504 goto out;
505} 505}
@@ -663,7 +663,10 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
663 return PTR_ERR(rdt.cache); 663 return PTR_ERR(rdt.cache);
664 } 664 }
665 665
666 return iterate_dir(od->realfile, &rdt.ctx); 666 err = iterate_dir(od->realfile, &rdt.ctx);
667 ctx->pos = rdt.ctx.pos;
668
669 return err;
667} 670}
668 671
669 672
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 288d20f9a55a..76440feb79f6 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -33,6 +33,13 @@ module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
33MODULE_PARM_DESC(ovl_redirect_dir_def, 33MODULE_PARM_DESC(ovl_redirect_dir_def,
34 "Default to on or off for the redirect_dir feature"); 34 "Default to on or off for the redirect_dir feature");
35 35
36static bool ovl_redirect_always_follow =
37 IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
38module_param_named(redirect_always_follow, ovl_redirect_always_follow,
39 bool, 0644);
40MODULE_PARM_DESC(ovl_redirect_always_follow,
41 "Follow redirects even if redirect_dir feature is turned off");
42
36static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX); 43static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
37module_param_named(index, ovl_index_def, bool, 0644); 44module_param_named(index, ovl_index_def, bool, 0644);
38MODULE_PARM_DESC(ovl_index_def, 45MODULE_PARM_DESC(ovl_index_def,
@@ -232,6 +239,7 @@ static void ovl_free_fs(struct ovl_fs *ofs)
232 kfree(ofs->config.lowerdir); 239 kfree(ofs->config.lowerdir);
233 kfree(ofs->config.upperdir); 240 kfree(ofs->config.upperdir);
234 kfree(ofs->config.workdir); 241 kfree(ofs->config.workdir);
242 kfree(ofs->config.redirect_mode);
235 if (ofs->creator_cred) 243 if (ofs->creator_cred)
236 put_cred(ofs->creator_cred); 244 put_cred(ofs->creator_cred);
237 kfree(ofs); 245 kfree(ofs);
@@ -244,6 +252,7 @@ static void ovl_put_super(struct super_block *sb)
244 ovl_free_fs(ofs); 252 ovl_free_fs(ofs);
245} 253}
246 254
255/* Sync real dirty inodes in upper filesystem (if it exists) */
247static int ovl_sync_fs(struct super_block *sb, int wait) 256static int ovl_sync_fs(struct super_block *sb, int wait)
248{ 257{
249 struct ovl_fs *ofs = sb->s_fs_info; 258 struct ovl_fs *ofs = sb->s_fs_info;
@@ -252,14 +261,24 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
252 261
253 if (!ofs->upper_mnt) 262 if (!ofs->upper_mnt)
254 return 0; 263 return 0;
255 upper_sb = ofs->upper_mnt->mnt_sb; 264
256 if (!upper_sb->s_op->sync_fs) 265 /*
266 * If this is a sync(2) call or an emergency sync, all the super blocks
267 * will be iterated, including upper_sb, so no need to do anything.
268 *
269 * If this is a syncfs(2) call, then we do need to call
270 * sync_filesystem() on upper_sb, but enough if we do it when being
271 * called with wait == 1.
272 */
273 if (!wait)
257 return 0; 274 return 0;
258 275
259 /* real inodes have already been synced by sync_filesystem(ovl_sb) */ 276 upper_sb = ofs->upper_mnt->mnt_sb;
277
260 down_read(&upper_sb->s_umount); 278 down_read(&upper_sb->s_umount);
261 ret = upper_sb->s_op->sync_fs(upper_sb, wait); 279 ret = sync_filesystem(upper_sb);
262 up_read(&upper_sb->s_umount); 280 up_read(&upper_sb->s_umount);
281
263 return ret; 282 return ret;
264} 283}
265 284
@@ -295,6 +314,11 @@ static bool ovl_force_readonly(struct ovl_fs *ofs)
295 return (!ofs->upper_mnt || !ofs->workdir); 314 return (!ofs->upper_mnt || !ofs->workdir);
296} 315}
297 316
317static const char *ovl_redirect_mode_def(void)
318{
319 return ovl_redirect_dir_def ? "on" : "off";
320}
321
298/** 322/**
299 * ovl_show_options 323 * ovl_show_options
300 * 324 *
@@ -313,12 +337,10 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
313 } 337 }
314 if (ofs->config.default_permissions) 338 if (ofs->config.default_permissions)
315 seq_puts(m, ",default_permissions"); 339 seq_puts(m, ",default_permissions");
316 if (ofs->config.redirect_dir != ovl_redirect_dir_def) 340 if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
317 seq_printf(m, ",redirect_dir=%s", 341 seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
318 ofs->config.redirect_dir ? "on" : "off");
319 if (ofs->config.index != ovl_index_def) 342 if (ofs->config.index != ovl_index_def)
320 seq_printf(m, ",index=%s", 343 seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
321 ofs->config.index ? "on" : "off");
322 return 0; 344 return 0;
323} 345}
324 346
@@ -348,8 +370,7 @@ enum {
348 OPT_UPPERDIR, 370 OPT_UPPERDIR,
349 OPT_WORKDIR, 371 OPT_WORKDIR,
350 OPT_DEFAULT_PERMISSIONS, 372 OPT_DEFAULT_PERMISSIONS,
351 OPT_REDIRECT_DIR_ON, 373 OPT_REDIRECT_DIR,
352 OPT_REDIRECT_DIR_OFF,
353 OPT_INDEX_ON, 374 OPT_INDEX_ON,
354 OPT_INDEX_OFF, 375 OPT_INDEX_OFF,
355 OPT_ERR, 376 OPT_ERR,
@@ -360,8 +381,7 @@ static const match_table_t ovl_tokens = {
360 {OPT_UPPERDIR, "upperdir=%s"}, 381 {OPT_UPPERDIR, "upperdir=%s"},
361 {OPT_WORKDIR, "workdir=%s"}, 382 {OPT_WORKDIR, "workdir=%s"},
362 {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, 383 {OPT_DEFAULT_PERMISSIONS, "default_permissions"},
363 {OPT_REDIRECT_DIR_ON, "redirect_dir=on"}, 384 {OPT_REDIRECT_DIR, "redirect_dir=%s"},
364 {OPT_REDIRECT_DIR_OFF, "redirect_dir=off"},
365 {OPT_INDEX_ON, "index=on"}, 385 {OPT_INDEX_ON, "index=on"},
366 {OPT_INDEX_OFF, "index=off"}, 386 {OPT_INDEX_OFF, "index=off"},
367 {OPT_ERR, NULL} 387 {OPT_ERR, NULL}
@@ -390,10 +410,37 @@ static char *ovl_next_opt(char **s)
390 return sbegin; 410 return sbegin;
391} 411}
392 412
413static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
414{
415 if (strcmp(mode, "on") == 0) {
416 config->redirect_dir = true;
417 /*
418 * Does not make sense to have redirect creation without
419 * redirect following.
420 */
421 config->redirect_follow = true;
422 } else if (strcmp(mode, "follow") == 0) {
423 config->redirect_follow = true;
424 } else if (strcmp(mode, "off") == 0) {
425 if (ovl_redirect_always_follow)
426 config->redirect_follow = true;
427 } else if (strcmp(mode, "nofollow") != 0) {
428 pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n",
429 mode);
430 return -EINVAL;
431 }
432
433 return 0;
434}
435
393static int ovl_parse_opt(char *opt, struct ovl_config *config) 436static int ovl_parse_opt(char *opt, struct ovl_config *config)
394{ 437{
395 char *p; 438 char *p;
396 439
440 config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
441 if (!config->redirect_mode)
442 return -ENOMEM;
443
397 while ((p = ovl_next_opt(&opt)) != NULL) { 444 while ((p = ovl_next_opt(&opt)) != NULL) {
398 int token; 445 int token;
399 substring_t args[MAX_OPT_ARGS]; 446 substring_t args[MAX_OPT_ARGS];
@@ -428,12 +475,11 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
428 config->default_permissions = true; 475 config->default_permissions = true;
429 break; 476 break;
430 477
431 case OPT_REDIRECT_DIR_ON: 478 case OPT_REDIRECT_DIR:
432 config->redirect_dir = true; 479 kfree(config->redirect_mode);
433 break; 480 config->redirect_mode = match_strdup(&args[0]);
434 481 if (!config->redirect_mode)
435 case OPT_REDIRECT_DIR_OFF: 482 return -ENOMEM;
436 config->redirect_dir = false;
437 break; 483 break;
438 484
439 case OPT_INDEX_ON: 485 case OPT_INDEX_ON:
@@ -458,7 +504,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
458 config->workdir = NULL; 504 config->workdir = NULL;
459 } 505 }
460 506
461 return 0; 507 return ovl_parse_redirect_mode(config, config->redirect_mode);
462} 508}
463 509
464#define OVL_WORKDIR_NAME "work" 510#define OVL_WORKDIR_NAME "work"
@@ -1160,7 +1206,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
1160 if (!cred) 1206 if (!cred)
1161 goto out_err; 1207 goto out_err;
1162 1208
1163 ofs->config.redirect_dir = ovl_redirect_dir_def;
1164 ofs->config.index = ovl_index_def; 1209 ofs->config.index = ovl_index_def;
1165 err = ovl_parse_opt((char *) data, &ofs->config); 1210 err = ovl_parse_opt((char *) data, &ofs->config);
1166 if (err) 1211 if (err)
diff --git a/fs/super.c b/fs/super.c
index d4e33e8f1e6f..7ff1349609e4 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -191,6 +191,24 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
191 191
192 INIT_LIST_HEAD(&s->s_mounts); 192 INIT_LIST_HEAD(&s->s_mounts);
193 s->s_user_ns = get_user_ns(user_ns); 193 s->s_user_ns = get_user_ns(user_ns);
194 init_rwsem(&s->s_umount);
195 lockdep_set_class(&s->s_umount, &type->s_umount_key);
196 /*
197 * sget() can have s_umount recursion.
198 *
199 * When it cannot find a suitable sb, it allocates a new
200 * one (this one), and tries again to find a suitable old
201 * one.
202 *
203 * In case that succeeds, it will acquire the s_umount
204 * lock of the old one. Since these are clearly distrinct
205 * locks, and this object isn't exposed yet, there's no
206 * risk of deadlocks.
207 *
208 * Annotate this by putting this lock in a different
209 * subclass.
210 */
211 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
194 212
195 if (security_sb_alloc(s)) 213 if (security_sb_alloc(s))
196 goto fail; 214 goto fail;
@@ -218,25 +236,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
218 goto fail; 236 goto fail;
219 if (list_lru_init_memcg(&s->s_inode_lru)) 237 if (list_lru_init_memcg(&s->s_inode_lru))
220 goto fail; 238 goto fail;
221
222 init_rwsem(&s->s_umount);
223 lockdep_set_class(&s->s_umount, &type->s_umount_key);
224 /*
225 * sget() can have s_umount recursion.
226 *
227 * When it cannot find a suitable sb, it allocates a new
228 * one (this one), and tries again to find a suitable old
229 * one.
230 *
231 * In case that succeeds, it will acquire the s_umount
232 * lock of the old one. Since these are clearly distrinct
233 * locks, and this object isn't exposed yet, there's no
234 * risk of deadlocks.
235 *
236 * Annotate this by putting this lock in a different
237 * subclass.
238 */
239 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
240 s->s_count = 1; 239 s->s_count = 1;
241 atomic_set(&s->s_active, 1); 240 atomic_set(&s->s_active, 1);
242 mutex_init(&s->s_vfs_rename_mutex); 241 mutex_init(&s->s_vfs_rename_mutex);
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 0da80019a917..83ed7715f856 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -702,7 +702,7 @@ xfs_alloc_ag_vextent(
702 ASSERT(args->agbno % args->alignment == 0); 702 ASSERT(args->agbno % args->alignment == 0);
703 703
704 /* if not file data, insert new block into the reverse map btree */ 704 /* if not file data, insert new block into the reverse map btree */
705 if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) { 705 if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
706 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno, 706 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
707 args->agbno, args->len, &args->oinfo); 707 args->agbno, args->len, &args->oinfo);
708 if (error) 708 if (error)
@@ -1682,7 +1682,7 @@ xfs_free_ag_extent(
1682 bno_cur = cnt_cur = NULL; 1682 bno_cur = cnt_cur = NULL;
1683 mp = tp->t_mountp; 1683 mp = tp->t_mountp;
1684 1684
1685 if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) { 1685 if (!xfs_rmap_should_skip_owner_update(oinfo)) {
1686 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo); 1686 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1687 if (error) 1687 if (error)
1688 goto error0; 1688 goto error0;
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 6249c92671de..a76914db72ef 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -212,6 +212,7 @@ xfs_attr_set(
212 int flags) 212 int flags)
213{ 213{
214 struct xfs_mount *mp = dp->i_mount; 214 struct xfs_mount *mp = dp->i_mount;
215 struct xfs_buf *leaf_bp = NULL;
215 struct xfs_da_args args; 216 struct xfs_da_args args;
216 struct xfs_defer_ops dfops; 217 struct xfs_defer_ops dfops;
217 struct xfs_trans_res tres; 218 struct xfs_trans_res tres;
@@ -327,9 +328,16 @@ xfs_attr_set(
327 * GROT: another possible req'mt for a double-split btree op. 328 * GROT: another possible req'mt for a double-split btree op.
328 */ 329 */
329 xfs_defer_init(args.dfops, args.firstblock); 330 xfs_defer_init(args.dfops, args.firstblock);
330 error = xfs_attr_shortform_to_leaf(&args); 331 error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
331 if (error) 332 if (error)
332 goto out_defer_cancel; 333 goto out_defer_cancel;
334 /*
335 * Prevent the leaf buffer from being unlocked so that a
336 * concurrent AIL push cannot grab the half-baked leaf
337 * buffer and run into problems with the write verifier.
338 */
339 xfs_trans_bhold(args.trans, leaf_bp);
340 xfs_defer_bjoin(args.dfops, leaf_bp);
333 xfs_defer_ijoin(args.dfops, dp); 341 xfs_defer_ijoin(args.dfops, dp);
334 error = xfs_defer_finish(&args.trans, args.dfops); 342 error = xfs_defer_finish(&args.trans, args.dfops);
335 if (error) 343 if (error)
@@ -337,13 +345,14 @@ xfs_attr_set(
337 345
338 /* 346 /*
339 * Commit the leaf transformation. We'll need another (linked) 347 * Commit the leaf transformation. We'll need another (linked)
340 * transaction to add the new attribute to the leaf. 348 * transaction to add the new attribute to the leaf, which
349 * means that we have to hold & join the leaf buffer here too.
341 */ 350 */
342
343 error = xfs_trans_roll_inode(&args.trans, dp); 351 error = xfs_trans_roll_inode(&args.trans, dp);
344 if (error) 352 if (error)
345 goto out; 353 goto out;
346 354 xfs_trans_bjoin(args.trans, leaf_bp);
355 leaf_bp = NULL;
347 } 356 }
348 357
349 if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) 358 if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
@@ -374,8 +383,9 @@ xfs_attr_set(
374 383
375out_defer_cancel: 384out_defer_cancel:
376 xfs_defer_cancel(&dfops); 385 xfs_defer_cancel(&dfops);
377 args.trans = NULL;
378out: 386out:
387 if (leaf_bp)
388 xfs_trans_brelse(args.trans, leaf_bp);
379 if (args.trans) 389 if (args.trans)
380 xfs_trans_cancel(args.trans); 390 xfs_trans_cancel(args.trans);
381 xfs_iunlock(dp, XFS_ILOCK_EXCL); 391 xfs_iunlock(dp, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 53cc8b986eac..601eaa36f1ad 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -735,10 +735,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
735} 735}
736 736
737/* 737/*
738 * Convert from using the shortform to the leaf. 738 * Convert from using the shortform to the leaf. On success, return the
739 * buffer so that we can keep it locked until we're totally done with it.
739 */ 740 */
740int 741int
741xfs_attr_shortform_to_leaf(xfs_da_args_t *args) 742xfs_attr_shortform_to_leaf(
743 struct xfs_da_args *args,
744 struct xfs_buf **leaf_bp)
742{ 745{
743 xfs_inode_t *dp; 746 xfs_inode_t *dp;
744 xfs_attr_shortform_t *sf; 747 xfs_attr_shortform_t *sf;
@@ -818,7 +821,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
818 sfe = XFS_ATTR_SF_NEXTENTRY(sfe); 821 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
819 } 822 }
820 error = 0; 823 error = 0;
821 824 *leaf_bp = bp;
822out: 825out:
823 kmem_free(tmpbuffer); 826 kmem_free(tmpbuffer);
824 return error; 827 return error;
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index f7dda0c237b0..894124efb421 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -48,7 +48,8 @@ void xfs_attr_shortform_create(struct xfs_da_args *args);
48void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff); 48void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
49int xfs_attr_shortform_lookup(struct xfs_da_args *args); 49int xfs_attr_shortform_lookup(struct xfs_da_args *args);
50int xfs_attr_shortform_getvalue(struct xfs_da_args *args); 50int xfs_attr_shortform_getvalue(struct xfs_da_args *args);
51int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); 51int xfs_attr_shortform_to_leaf(struct xfs_da_args *args,
52 struct xfs_buf **leaf_bp);
52int xfs_attr_shortform_remove(struct xfs_da_args *args); 53int xfs_attr_shortform_remove(struct xfs_da_args *args);
53int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); 54int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
54int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes); 55int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 1210f684d3c2..1bddbba6b80c 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5136,7 +5136,7 @@ __xfs_bunmapi(
5136 * blowing out the transaction with a mix of EFIs and reflink 5136 * blowing out the transaction with a mix of EFIs and reflink
5137 * adjustments. 5137 * adjustments.
5138 */ 5138 */
5139 if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) 5139 if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
5140 max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res)); 5140 max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
5141 else 5141 else
5142 max_len = len; 5142 max_len = len;
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 072ebfe1d6ae..087fea02c389 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -249,6 +249,10 @@ xfs_defer_trans_roll(
249 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) 249 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
250 xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE); 250 xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE);
251 251
252 /* Hold the (previously bjoin'd) buffer locked across the roll. */
253 for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++)
254 xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]);
255
252 trace_xfs_defer_trans_roll((*tp)->t_mountp, dop); 256 trace_xfs_defer_trans_roll((*tp)->t_mountp, dop);
253 257
254 /* Roll the transaction. */ 258 /* Roll the transaction. */
@@ -264,6 +268,12 @@ xfs_defer_trans_roll(
264 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) 268 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
265 xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0); 269 xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
266 270
271 /* Rejoin the buffers and dirty them so the log moves forward. */
272 for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) {
273 xfs_trans_bjoin(*tp, dop->dop_bufs[i]);
274 xfs_trans_bhold(*tp, dop->dop_bufs[i]);
275 }
276
267 return error; 277 return error;
268} 278}
269 279
@@ -295,6 +305,31 @@ xfs_defer_ijoin(
295 } 305 }
296 } 306 }
297 307
308 ASSERT(0);
309 return -EFSCORRUPTED;
310}
311
312/*
313 * Add this buffer to the deferred op. Each joined buffer is relogged
314 * each time we roll the transaction.
315 */
316int
317xfs_defer_bjoin(
318 struct xfs_defer_ops *dop,
319 struct xfs_buf *bp)
320{
321 int i;
322
323 for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) {
324 if (dop->dop_bufs[i] == bp)
325 return 0;
326 else if (dop->dop_bufs[i] == NULL) {
327 dop->dop_bufs[i] = bp;
328 return 0;
329 }
330 }
331
332 ASSERT(0);
298 return -EFSCORRUPTED; 333 return -EFSCORRUPTED;
299} 334}
300 335
@@ -493,9 +528,7 @@ xfs_defer_init(
493 struct xfs_defer_ops *dop, 528 struct xfs_defer_ops *dop,
494 xfs_fsblock_t *fbp) 529 xfs_fsblock_t *fbp)
495{ 530{
496 dop->dop_committed = false; 531 memset(dop, 0, sizeof(struct xfs_defer_ops));
497 dop->dop_low = false;
498 memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes));
499 *fbp = NULLFSBLOCK; 532 *fbp = NULLFSBLOCK;
500 INIT_LIST_HEAD(&dop->dop_intake); 533 INIT_LIST_HEAD(&dop->dop_intake);
501 INIT_LIST_HEAD(&dop->dop_pending); 534 INIT_LIST_HEAD(&dop->dop_pending);
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index d4f046dd44bd..045beacdd37d 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -59,6 +59,7 @@ enum xfs_defer_ops_type {
59}; 59};
60 60
61#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ 61#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */
62#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */
62 63
63struct xfs_defer_ops { 64struct xfs_defer_ops {
64 bool dop_committed; /* did any trans commit? */ 65 bool dop_committed; /* did any trans commit? */
@@ -66,8 +67,9 @@ struct xfs_defer_ops {
66 struct list_head dop_intake; /* unlogged pending work */ 67 struct list_head dop_intake; /* unlogged pending work */
67 struct list_head dop_pending; /* logged pending work */ 68 struct list_head dop_pending; /* logged pending work */
68 69
69 /* relog these inodes with each roll */ 70 /* relog these with each roll */
70 struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES]; 71 struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES];
72 struct xfs_buf *dop_bufs[XFS_DEFER_OPS_NR_BUFS];
71}; 73};
72 74
73void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type, 75void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
@@ -77,6 +79,7 @@ void xfs_defer_cancel(struct xfs_defer_ops *dop);
77void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp); 79void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp);
78bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop); 80bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
79int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip); 81int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
82int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp);
80 83
81/* Description of a deferred type. */ 84/* Description of a deferred type. */
82struct xfs_defer_op_type { 85struct xfs_defer_op_type {
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index de3f04a98656..3b57ef0f2f76 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -920,8 +920,7 @@ STATIC xfs_agnumber_t
920xfs_ialloc_ag_select( 920xfs_ialloc_ag_select(
921 xfs_trans_t *tp, /* transaction pointer */ 921 xfs_trans_t *tp, /* transaction pointer */
922 xfs_ino_t parent, /* parent directory inode number */ 922 xfs_ino_t parent, /* parent directory inode number */
923 umode_t mode, /* bits set to indicate file type */ 923 umode_t mode) /* bits set to indicate file type */
924 int okalloc) /* ok to allocate more space */
925{ 924{
926 xfs_agnumber_t agcount; /* number of ag's in the filesystem */ 925 xfs_agnumber_t agcount; /* number of ag's in the filesystem */
927 xfs_agnumber_t agno; /* current ag number */ 926 xfs_agnumber_t agno; /* current ag number */
@@ -978,9 +977,6 @@ xfs_ialloc_ag_select(
978 return agno; 977 return agno;
979 } 978 }
980 979
981 if (!okalloc)
982 goto nextag;
983
984 if (!pag->pagf_init) { 980 if (!pag->pagf_init) {
985 error = xfs_alloc_pagf_init(mp, tp, agno, flags); 981 error = xfs_alloc_pagf_init(mp, tp, agno, flags);
986 if (error) 982 if (error)
@@ -1680,7 +1676,6 @@ xfs_dialloc(
1680 struct xfs_trans *tp, 1676 struct xfs_trans *tp,
1681 xfs_ino_t parent, 1677 xfs_ino_t parent,
1682 umode_t mode, 1678 umode_t mode,
1683 int okalloc,
1684 struct xfs_buf **IO_agbp, 1679 struct xfs_buf **IO_agbp,
1685 xfs_ino_t *inop) 1680 xfs_ino_t *inop)
1686{ 1681{
@@ -1692,6 +1687,7 @@ xfs_dialloc(
1692 int noroom = 0; 1687 int noroom = 0;
1693 xfs_agnumber_t start_agno; 1688 xfs_agnumber_t start_agno;
1694 struct xfs_perag *pag; 1689 struct xfs_perag *pag;
1690 int okalloc = 1;
1695 1691
1696 if (*IO_agbp) { 1692 if (*IO_agbp) {
1697 /* 1693 /*
@@ -1707,7 +1703,7 @@ xfs_dialloc(
1707 * We do not have an agbp, so select an initial allocation 1703 * We do not have an agbp, so select an initial allocation
1708 * group for inode allocation. 1704 * group for inode allocation.
1709 */ 1705 */
1710 start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc); 1706 start_agno = xfs_ialloc_ag_select(tp, parent, mode);
1711 if (start_agno == NULLAGNUMBER) { 1707 if (start_agno == NULLAGNUMBER) {
1712 *inop = NULLFSINO; 1708 *inop = NULLFSINO;
1713 return 0; 1709 return 0;
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index d2bdcd5e7312..66a8de0b1caa 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -81,7 +81,6 @@ xfs_dialloc(
81 struct xfs_trans *tp, /* transaction pointer */ 81 struct xfs_trans *tp, /* transaction pointer */
82 xfs_ino_t parent, /* parent inode (directory) */ 82 xfs_ino_t parent, /* parent inode (directory) */
83 umode_t mode, /* mode bits for new inode */ 83 umode_t mode, /* mode bits for new inode */
84 int okalloc, /* ok to allocate more space */
85 struct xfs_buf **agbp, /* buf for a.g. inode header */ 84 struct xfs_buf **agbp, /* buf for a.g. inode header */
86 xfs_ino_t *inop); /* inode number allocated */ 85 xfs_ino_t *inop); /* inode number allocated */
87 86
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index 89bf16b4d937..b0f31791c7e6 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -632,8 +632,6 @@ xfs_iext_insert(
632 struct xfs_iext_leaf *new = NULL; 632 struct xfs_iext_leaf *new = NULL;
633 int nr_entries, i; 633 int nr_entries, i;
634 634
635 trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
636
637 if (ifp->if_height == 0) 635 if (ifp->if_height == 0)
638 xfs_iext_alloc_root(ifp, cur); 636 xfs_iext_alloc_root(ifp, cur);
639 else if (ifp->if_height == 1) 637 else if (ifp->if_height == 1)
@@ -661,6 +659,8 @@ xfs_iext_insert(
661 xfs_iext_set(cur_rec(cur), irec); 659 xfs_iext_set(cur_rec(cur), irec);
662 ifp->if_bytes += sizeof(struct xfs_iext_rec); 660 ifp->if_bytes += sizeof(struct xfs_iext_rec);
663 661
662 trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
663
664 if (new) 664 if (new)
665 xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2); 665 xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
666} 666}
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 585b35d34142..c40d26763075 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1488,27 +1488,12 @@ __xfs_refcount_cow_alloc(
1488 xfs_extlen_t aglen, 1488 xfs_extlen_t aglen,
1489 struct xfs_defer_ops *dfops) 1489 struct xfs_defer_ops *dfops)
1490{ 1490{
1491 int error;
1492
1493 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, 1491 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno,
1494 agbno, aglen); 1492 agbno, aglen);
1495 1493
1496 /* Add refcount btree reservation */ 1494 /* Add refcount btree reservation */
1497 error = xfs_refcount_adjust_cow(rcur, agbno, aglen, 1495 return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1498 XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops); 1496 XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops);
1499 if (error)
1500 return error;
1501
1502 /* Add rmap entry */
1503 if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
1504 error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops,
1505 rcur->bc_private.a.agno,
1506 agbno, aglen, XFS_RMAP_OWN_COW);
1507 if (error)
1508 return error;
1509 }
1510
1511 return error;
1512} 1497}
1513 1498
1514/* 1499/*
@@ -1521,27 +1506,12 @@ __xfs_refcount_cow_free(
1521 xfs_extlen_t aglen, 1506 xfs_extlen_t aglen,
1522 struct xfs_defer_ops *dfops) 1507 struct xfs_defer_ops *dfops)
1523{ 1508{
1524 int error;
1525
1526 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, 1509 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno,
1527 agbno, aglen); 1510 agbno, aglen);
1528 1511
1529 /* Remove refcount btree reservation */ 1512 /* Remove refcount btree reservation */
1530 error = xfs_refcount_adjust_cow(rcur, agbno, aglen, 1513 return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1531 XFS_REFCOUNT_ADJUST_COW_FREE, dfops); 1514 XFS_REFCOUNT_ADJUST_COW_FREE, dfops);
1532 if (error)
1533 return error;
1534
1535 /* Remove rmap entry */
1536 if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
1537 error = xfs_rmap_free_extent(rcur->bc_mp, dfops,
1538 rcur->bc_private.a.agno,
1539 agbno, aglen, XFS_RMAP_OWN_COW);
1540 if (error)
1541 return error;
1542 }
1543
1544 return error;
1545} 1515}
1546 1516
1547/* Record a CoW staging extent in the refcount btree. */ 1517/* Record a CoW staging extent in the refcount btree. */
@@ -1552,11 +1522,19 @@ xfs_refcount_alloc_cow_extent(
1552 xfs_fsblock_t fsb, 1522 xfs_fsblock_t fsb,
1553 xfs_extlen_t len) 1523 xfs_extlen_t len)
1554{ 1524{
1525 int error;
1526
1555 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1527 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1556 return 0; 1528 return 0;
1557 1529
1558 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, 1530 error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
1559 fsb, len); 1531 fsb, len);
1532 if (error)
1533 return error;
1534
1535 /* Add rmap entry */
1536 return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
1537 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
1560} 1538}
1561 1539
1562/* Forget a CoW staging event in the refcount btree. */ 1540/* Forget a CoW staging event in the refcount btree. */
@@ -1567,9 +1545,17 @@ xfs_refcount_free_cow_extent(
1567 xfs_fsblock_t fsb, 1545 xfs_fsblock_t fsb,
1568 xfs_extlen_t len) 1546 xfs_extlen_t len)
1569{ 1547{
1548 int error;
1549
1570 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1550 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1571 return 0; 1551 return 0;
1572 1552
1553 /* Remove rmap entry */
1554 error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
1555 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
1556 if (error)
1557 return error;
1558
1573 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW, 1559 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW,
1574 fsb, len); 1560 fsb, len);
1575} 1561}
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index dd019cee1b3b..50db920ceeeb 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -368,6 +368,51 @@ xfs_rmap_lookup_le_range(
368} 368}
369 369
370/* 370/*
371 * Perform all the relevant owner checks for a removal op. If we're doing an
372 * unknown-owner removal then we have no owner information to check.
373 */
374static int
375xfs_rmap_free_check_owner(
376 struct xfs_mount *mp,
377 uint64_t ltoff,
378 struct xfs_rmap_irec *rec,
379 xfs_fsblock_t bno,
380 xfs_filblks_t len,
381 uint64_t owner,
382 uint64_t offset,
383 unsigned int flags)
384{
385 int error = 0;
386
387 if (owner == XFS_RMAP_OWN_UNKNOWN)
388 return 0;
389
390 /* Make sure the unwritten flag matches. */
391 XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
392 (rec->rm_flags & XFS_RMAP_UNWRITTEN), out);
393
394 /* Make sure the owner matches what we expect to find in the tree. */
395 XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out);
396
397 /* Check the offset, if necessary. */
398 if (XFS_RMAP_NON_INODE_OWNER(owner))
399 goto out;
400
401 if (flags & XFS_RMAP_BMBT_BLOCK) {
402 XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK,
403 out);
404 } else {
405 XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out);
406 XFS_WANT_CORRUPTED_GOTO(mp,
407 ltoff + rec->rm_blockcount >= offset + len,
408 out);
409 }
410
411out:
412 return error;
413}
414
415/*
371 * Find the extent in the rmap btree and remove it. 416 * Find the extent in the rmap btree and remove it.
372 * 417 *
373 * The record we find should always be an exact match for the extent that we're 418 * The record we find should always be an exact match for the extent that we're
@@ -444,33 +489,40 @@ xfs_rmap_unmap(
444 goto out_done; 489 goto out_done;
445 } 490 }
446 491
447 /* Make sure the unwritten flag matches. */ 492 /*
448 XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == 493 * If we're doing an unknown-owner removal for EFI recovery, we expect
449 (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error); 494 * to find the full range in the rmapbt or nothing at all. If we
495 * don't find any rmaps overlapping either end of the range, we're
496 * done. Hopefully this means that the EFI creator already queued
497 * (and finished) a RUI to remove the rmap.
498 */
499 if (owner == XFS_RMAP_OWN_UNKNOWN &&
500 ltrec.rm_startblock + ltrec.rm_blockcount <= bno) {
501 struct xfs_rmap_irec rtrec;
502
503 error = xfs_btree_increment(cur, 0, &i);
504 if (error)
505 goto out_error;
506 if (i == 0)
507 goto out_done;
508 error = xfs_rmap_get_rec(cur, &rtrec, &i);
509 if (error)
510 goto out_error;
511 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
512 if (rtrec.rm_startblock >= bno + len)
513 goto out_done;
514 }
450 515
451 /* Make sure the extent we found covers the entire freeing range. */ 516 /* Make sure the extent we found covers the entire freeing range. */
452 XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno && 517 XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno &&
453 ltrec.rm_startblock + ltrec.rm_blockcount >= 518 ltrec.rm_startblock + ltrec.rm_blockcount >=
454 bno + len, out_error); 519 bno + len, out_error);
455 520
456 /* Make sure the owner matches what we expect to find in the tree. */ 521 /* Check owner information. */
457 XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner || 522 error = xfs_rmap_free_check_owner(mp, ltoff, &ltrec, bno, len, owner,
458 XFS_RMAP_NON_INODE_OWNER(owner), out_error); 523 offset, flags);
459 524 if (error)
460 /* Check the offset, if necessary. */ 525 goto out_error;
461 if (!XFS_RMAP_NON_INODE_OWNER(owner)) {
462 if (flags & XFS_RMAP_BMBT_BLOCK) {
463 XFS_WANT_CORRUPTED_GOTO(mp,
464 ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK,
465 out_error);
466 } else {
467 XFS_WANT_CORRUPTED_GOTO(mp,
468 ltrec.rm_offset <= offset, out_error);
469 XFS_WANT_CORRUPTED_GOTO(mp,
470 ltoff + ltrec.rm_blockcount >= offset + len,
471 out_error);
472 }
473 }
474 526
475 if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { 527 if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
476 /* exact match, simply remove the record from rmap tree */ 528 /* exact match, simply remove the record from rmap tree */
@@ -664,6 +716,7 @@ xfs_rmap_map(
664 flags |= XFS_RMAP_UNWRITTEN; 716 flags |= XFS_RMAP_UNWRITTEN;
665 trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len, 717 trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len,
666 unwritten, oinfo); 718 unwritten, oinfo);
719 ASSERT(!xfs_rmap_should_skip_owner_update(oinfo));
667 720
668 /* 721 /*
669 * For the initial lookup, look for an exact match or the left-adjacent 722 * For the initial lookup, look for an exact match or the left-adjacent
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 466ede637080..0fcd5b1ba729 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -61,7 +61,21 @@ static inline void
61xfs_rmap_skip_owner_update( 61xfs_rmap_skip_owner_update(
62 struct xfs_owner_info *oi) 62 struct xfs_owner_info *oi)
63{ 63{
64 oi->oi_owner = XFS_RMAP_OWN_UNKNOWN; 64 xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_NULL);
65}
66
67static inline bool
68xfs_rmap_should_skip_owner_update(
69 struct xfs_owner_info *oi)
70{
71 return oi->oi_owner == XFS_RMAP_OWN_NULL;
72}
73
74static inline void
75xfs_rmap_any_owner_update(
76 struct xfs_owner_info *oi)
77{
78 xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_UNKNOWN);
65} 79}
66 80
67/* Reverse mapping functions. */ 81/* Reverse mapping functions. */
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 9c42c4efd01e..ab3aef2ae823 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -46,7 +46,6 @@
46#include "scrub/scrub.h" 46#include "scrub/scrub.h"
47#include "scrub/common.h" 47#include "scrub/common.h"
48#include "scrub/trace.h" 48#include "scrub/trace.h"
49#include "scrub/scrub.h"
50#include "scrub/btree.h" 49#include "scrub/btree.h"
51 50
52/* 51/*
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 472080e75788..86daed0e3a45 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -26,7 +26,6 @@
26#include "xfs_mount.h" 26#include "xfs_mount.h"
27#include "xfs_defer.h" 27#include "xfs_defer.h"
28#include "xfs_da_format.h" 28#include "xfs_da_format.h"
29#include "xfs_defer.h"
30#include "xfs_inode.h" 29#include "xfs_inode.h"
31#include "xfs_btree.h" 30#include "xfs_btree.h"
32#include "xfs_trans.h" 31#include "xfs_trans.h"
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 44f8c5451210..64da90655e95 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -538,7 +538,7 @@ xfs_efi_recover(
538 return error; 538 return error;
539 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); 539 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
540 540
541 xfs_rmap_skip_owner_update(&oinfo); 541 xfs_rmap_any_owner_update(&oinfo);
542 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 542 for (i = 0; i < efip->efi_format.efi_nextents; i++) {
543 extp = &efip->efi_format.efi_extents[i]; 543 extp = &efip->efi_format.efi_extents[i];
544 error = xfs_trans_free_extent(tp, efdp, extp->ext_start, 544 error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 8f22fc579dbb..60a2e128cb6a 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -571,6 +571,11 @@ xfs_growfs_data_private(
571 * this doesn't actually exist in the rmap btree. 571 * this doesn't actually exist in the rmap btree.
572 */ 572 */
573 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL); 573 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
574 error = xfs_rmap_free(tp, bp, agno,
575 be32_to_cpu(agf->agf_length) - new,
576 new, &oinfo);
577 if (error)
578 goto error0;
574 error = xfs_free_extent(tp, 579 error = xfs_free_extent(tp,
575 XFS_AGB_TO_FSB(mp, agno, 580 XFS_AGB_TO_FSB(mp, agno,
576 be32_to_cpu(agf->agf_length) - new), 581 be32_to_cpu(agf->agf_length) - new),
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 43005fbe8b1e..3861d61fb265 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -870,7 +870,7 @@ xfs_eofblocks_worker(
870 * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default). 870 * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default).
871 * (We'll just piggyback on the post-EOF prealloc space workqueue.) 871 * (We'll just piggyback on the post-EOF prealloc space workqueue.)
872 */ 872 */
873STATIC void 873void
874xfs_queue_cowblocks( 874xfs_queue_cowblocks(
875 struct xfs_mount *mp) 875 struct xfs_mount *mp)
876{ 876{
@@ -1536,8 +1536,23 @@ xfs_inode_free_quota_eofblocks(
1536 return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks); 1536 return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks);
1537} 1537}
1538 1538
1539static inline unsigned long
1540xfs_iflag_for_tag(
1541 int tag)
1542{
1543 switch (tag) {
1544 case XFS_ICI_EOFBLOCKS_TAG:
1545 return XFS_IEOFBLOCKS;
1546 case XFS_ICI_COWBLOCKS_TAG:
1547 return XFS_ICOWBLOCKS;
1548 default:
1549 ASSERT(0);
1550 return 0;
1551 }
1552}
1553
1539static void 1554static void
1540__xfs_inode_set_eofblocks_tag( 1555__xfs_inode_set_blocks_tag(
1541 xfs_inode_t *ip, 1556 xfs_inode_t *ip,
1542 void (*execute)(struct xfs_mount *mp), 1557 void (*execute)(struct xfs_mount *mp),
1543 void (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, 1558 void (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
@@ -1552,10 +1567,10 @@ __xfs_inode_set_eofblocks_tag(
1552 * Don't bother locking the AG and looking up in the radix trees 1567 * Don't bother locking the AG and looking up in the radix trees
1553 * if we already know that we have the tag set. 1568 * if we already know that we have the tag set.
1554 */ 1569 */
1555 if (ip->i_flags & XFS_IEOFBLOCKS) 1570 if (ip->i_flags & xfs_iflag_for_tag(tag))
1556 return; 1571 return;
1557 spin_lock(&ip->i_flags_lock); 1572 spin_lock(&ip->i_flags_lock);
1558 ip->i_flags |= XFS_IEOFBLOCKS; 1573 ip->i_flags |= xfs_iflag_for_tag(tag);
1559 spin_unlock(&ip->i_flags_lock); 1574 spin_unlock(&ip->i_flags_lock);
1560 1575
1561 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 1576 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1587,13 +1602,13 @@ xfs_inode_set_eofblocks_tag(
1587 xfs_inode_t *ip) 1602 xfs_inode_t *ip)
1588{ 1603{
1589 trace_xfs_inode_set_eofblocks_tag(ip); 1604 trace_xfs_inode_set_eofblocks_tag(ip);
1590 return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_eofblocks, 1605 return __xfs_inode_set_blocks_tag(ip, xfs_queue_eofblocks,
1591 trace_xfs_perag_set_eofblocks, 1606 trace_xfs_perag_set_eofblocks,
1592 XFS_ICI_EOFBLOCKS_TAG); 1607 XFS_ICI_EOFBLOCKS_TAG);
1593} 1608}
1594 1609
1595static void 1610static void
1596__xfs_inode_clear_eofblocks_tag( 1611__xfs_inode_clear_blocks_tag(
1597 xfs_inode_t *ip, 1612 xfs_inode_t *ip,
1598 void (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, 1613 void (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
1599 int error, unsigned long caller_ip), 1614 int error, unsigned long caller_ip),
@@ -1603,7 +1618,7 @@ __xfs_inode_clear_eofblocks_tag(
1603 struct xfs_perag *pag; 1618 struct xfs_perag *pag;
1604 1619
1605 spin_lock(&ip->i_flags_lock); 1620 spin_lock(&ip->i_flags_lock);
1606 ip->i_flags &= ~XFS_IEOFBLOCKS; 1621 ip->i_flags &= ~xfs_iflag_for_tag(tag);
1607 spin_unlock(&ip->i_flags_lock); 1622 spin_unlock(&ip->i_flags_lock);
1608 1623
1609 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 1624 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1630,7 +1645,7 @@ xfs_inode_clear_eofblocks_tag(
1630 xfs_inode_t *ip) 1645 xfs_inode_t *ip)
1631{ 1646{
1632 trace_xfs_inode_clear_eofblocks_tag(ip); 1647 trace_xfs_inode_clear_eofblocks_tag(ip);
1633 return __xfs_inode_clear_eofblocks_tag(ip, 1648 return __xfs_inode_clear_blocks_tag(ip,
1634 trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG); 1649 trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG);
1635} 1650}
1636 1651
@@ -1724,7 +1739,7 @@ xfs_inode_set_cowblocks_tag(
1724 xfs_inode_t *ip) 1739 xfs_inode_t *ip)
1725{ 1740{
1726 trace_xfs_inode_set_cowblocks_tag(ip); 1741 trace_xfs_inode_set_cowblocks_tag(ip);
1727 return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks, 1742 return __xfs_inode_set_blocks_tag(ip, xfs_queue_cowblocks,
1728 trace_xfs_perag_set_cowblocks, 1743 trace_xfs_perag_set_cowblocks,
1729 XFS_ICI_COWBLOCKS_TAG); 1744 XFS_ICI_COWBLOCKS_TAG);
1730} 1745}
@@ -1734,6 +1749,6 @@ xfs_inode_clear_cowblocks_tag(
1734 xfs_inode_t *ip) 1749 xfs_inode_t *ip)
1735{ 1750{
1736 trace_xfs_inode_clear_cowblocks_tag(ip); 1751 trace_xfs_inode_clear_cowblocks_tag(ip);
1737 return __xfs_inode_clear_eofblocks_tag(ip, 1752 return __xfs_inode_clear_blocks_tag(ip,
1738 trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG); 1753 trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG);
1739} 1754}
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index bff4d85e5498..d4a77588eca1 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -81,6 +81,7 @@ void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip);
81int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *); 81int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *);
82int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip); 82int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip);
83void xfs_cowblocks_worker(struct work_struct *); 83void xfs_cowblocks_worker(struct work_struct *);
84void xfs_queue_cowblocks(struct xfs_mount *);
84 85
85int xfs_inode_ag_iterator(struct xfs_mount *mp, 86int xfs_inode_ag_iterator(struct xfs_mount *mp,
86 int (*execute)(struct xfs_inode *ip, int flags, void *args), 87 int (*execute)(struct xfs_inode *ip, int flags, void *args),
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 801274126648..6f95bdb408ce 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -749,7 +749,6 @@ xfs_ialloc(
749 xfs_nlink_t nlink, 749 xfs_nlink_t nlink,
750 dev_t rdev, 750 dev_t rdev,
751 prid_t prid, 751 prid_t prid,
752 int okalloc,
753 xfs_buf_t **ialloc_context, 752 xfs_buf_t **ialloc_context,
754 xfs_inode_t **ipp) 753 xfs_inode_t **ipp)
755{ 754{
@@ -765,7 +764,7 @@ xfs_ialloc(
765 * Call the space management code to pick 764 * Call the space management code to pick
766 * the on-disk inode to be allocated. 765 * the on-disk inode to be allocated.
767 */ 766 */
768 error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, 767 error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode,
769 ialloc_context, &ino); 768 ialloc_context, &ino);
770 if (error) 769 if (error)
771 return error; 770 return error;
@@ -957,7 +956,6 @@ xfs_dir_ialloc(
957 xfs_nlink_t nlink, 956 xfs_nlink_t nlink,
958 dev_t rdev, 957 dev_t rdev,
959 prid_t prid, /* project id */ 958 prid_t prid, /* project id */
960 int okalloc, /* ok to allocate new space */
961 xfs_inode_t **ipp, /* pointer to inode; it will be 959 xfs_inode_t **ipp, /* pointer to inode; it will be
962 locked. */ 960 locked. */
963 int *committed) 961 int *committed)
@@ -988,8 +986,8 @@ xfs_dir_ialloc(
988 * transaction commit so that no other process can steal 986 * transaction commit so that no other process can steal
989 * the inode(s) that we've just allocated. 987 * the inode(s) that we've just allocated.
990 */ 988 */
991 code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc, 989 code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, &ialloc_context,
992 &ialloc_context, &ip); 990 &ip);
993 991
994 /* 992 /*
995 * Return an error if we were unable to allocate a new inode. 993 * Return an error if we were unable to allocate a new inode.
@@ -1061,7 +1059,7 @@ xfs_dir_ialloc(
1061 * this call should always succeed. 1059 * this call should always succeed.
1062 */ 1060 */
1063 code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, 1061 code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
1064 okalloc, &ialloc_context, &ip); 1062 &ialloc_context, &ip);
1065 1063
1066 /* 1064 /*
1067 * If we get an error at this point, return to the caller 1065 * If we get an error at this point, return to the caller
@@ -1182,11 +1180,6 @@ xfs_create(
1182 xfs_flush_inodes(mp); 1180 xfs_flush_inodes(mp);
1183 error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp); 1181 error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
1184 } 1182 }
1185 if (error == -ENOSPC) {
1186 /* No space at all so try a "no-allocation" reservation */
1187 resblks = 0;
1188 error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
1189 }
1190 if (error) 1183 if (error)
1191 goto out_release_inode; 1184 goto out_release_inode;
1192 1185
@@ -1203,19 +1196,13 @@ xfs_create(
1203 if (error) 1196 if (error)
1204 goto out_trans_cancel; 1197 goto out_trans_cancel;
1205 1198
1206 if (!resblks) {
1207 error = xfs_dir_canenter(tp, dp, name);
1208 if (error)
1209 goto out_trans_cancel;
1210 }
1211
1212 /* 1199 /*
1213 * A newly created regular or special file just has one directory 1200 * A newly created regular or special file just has one directory
1214 * entry pointing to them, but a directory also the "." entry 1201 * entry pointing to them, but a directory also the "." entry
1215 * pointing to itself. 1202 * pointing to itself.
1216 */ 1203 */
1217 error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, 1204 error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip,
1218 prid, resblks > 0, &ip, NULL); 1205 NULL);
1219 if (error) 1206 if (error)
1220 goto out_trans_cancel; 1207 goto out_trans_cancel;
1221 1208
@@ -1340,11 +1327,6 @@ xfs_create_tmpfile(
1340 tres = &M_RES(mp)->tr_create_tmpfile; 1327 tres = &M_RES(mp)->tr_create_tmpfile;
1341 1328
1342 error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp); 1329 error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
1343 if (error == -ENOSPC) {
1344 /* No space at all so try a "no-allocation" reservation */
1345 resblks = 0;
1346 error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
1347 }
1348 if (error) 1330 if (error)
1349 goto out_release_inode; 1331 goto out_release_inode;
1350 1332
@@ -1353,8 +1335,7 @@ xfs_create_tmpfile(
1353 if (error) 1335 if (error)
1354 goto out_trans_cancel; 1336 goto out_trans_cancel;
1355 1337
1356 error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, 1338 error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip, NULL);
1357 prid, resblks > 0, &ip, NULL);
1358 if (error) 1339 if (error)
1359 goto out_trans_cancel; 1340 goto out_trans_cancel;
1360 1341
@@ -1506,6 +1487,24 @@ xfs_link(
1506 return error; 1487 return error;
1507} 1488}
1508 1489
1490/* Clear the reflink flag and the cowblocks tag if possible. */
1491static void
1492xfs_itruncate_clear_reflink_flags(
1493 struct xfs_inode *ip)
1494{
1495 struct xfs_ifork *dfork;
1496 struct xfs_ifork *cfork;
1497
1498 if (!xfs_is_reflink_inode(ip))
1499 return;
1500 dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1501 cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK);
1502 if (dfork->if_bytes == 0 && cfork->if_bytes == 0)
1503 ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
1504 if (cfork->if_bytes == 0)
1505 xfs_inode_clear_cowblocks_tag(ip);
1506}
1507
1509/* 1508/*
1510 * Free up the underlying blocks past new_size. The new size must be smaller 1509 * Free up the underlying blocks past new_size. The new size must be smaller
1511 * than the current size. This routine can be used both for the attribute and 1510 * than the current size. This routine can be used both for the attribute and
@@ -1602,15 +1601,7 @@ xfs_itruncate_extents(
1602 if (error) 1601 if (error)
1603 goto out; 1602 goto out;
1604 1603
1605 /* 1604 xfs_itruncate_clear_reflink_flags(ip);
1606 * Clear the reflink flag if there are no data fork blocks and
1607 * there are no extents staged in the cow fork.
1608 */
1609 if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
1610 if (ip->i_d.di_nblocks == 0)
1611 ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
1612 xfs_inode_clear_cowblocks_tag(ip);
1613 }
1614 1605
1615 /* 1606 /*
1616 * Always re-log the inode so that our permanent transaction can keep 1607 * Always re-log the inode so that our permanent transaction can keep
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index cc13c3763721..d383e392ec9d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -232,6 +232,7 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
232 * log recovery to replay a bmap operation on the inode. 232 * log recovery to replay a bmap operation on the inode.
233 */ 233 */
234#define XFS_IRECOVERY (1 << 11) 234#define XFS_IRECOVERY (1 << 11)
235#define XFS_ICOWBLOCKS (1 << 12)/* has the cowblocks tag set */
235 236
236/* 237/*
237 * Per-lifetime flags need to be reset when re-using a reclaimable inode during 238 * Per-lifetime flags need to be reset when re-using a reclaimable inode during
@@ -428,7 +429,7 @@ xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
428xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); 429xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip);
429 430
430int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t, 431int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t,
431 xfs_nlink_t, dev_t, prid_t, int, 432 xfs_nlink_t, dev_t, prid_t,
432 struct xfs_inode **, int *); 433 struct xfs_inode **, int *);
433 434
434/* from xfs_file.c */ 435/* from xfs_file.c */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 33eb4fb2e3fd..7ab52a8bc0a9 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1213,7 +1213,7 @@ xfs_xattr_iomap_begin(
1213 1213
1214 ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL); 1214 ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL);
1215 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, 1215 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
1216 &nimaps, XFS_BMAPI_ENTIRE | XFS_BMAPI_ATTRFORK); 1216 &nimaps, XFS_BMAPI_ATTRFORK);
1217out_unlock: 1217out_unlock:
1218 xfs_iunlock(ip, lockmode); 1218 xfs_iunlock(ip, lockmode);
1219 1219
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 010a13a201aa..ec952dfad359 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -793,8 +793,8 @@ xfs_qm_qino_alloc(
793 return error; 793 return error;
794 794
795 if (need_alloc) { 795 if (need_alloc) {
796 error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, 796 error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip,
797 &committed); 797 &committed);
798 if (error) { 798 if (error) {
799 xfs_trans_cancel(tp); 799 xfs_trans_cancel(tp);
800 return error; 800 return error;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index cc041a29eb70..47aea2e82c26 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -49,8 +49,6 @@
49#include "xfs_alloc.h" 49#include "xfs_alloc.h"
50#include "xfs_quota_defs.h" 50#include "xfs_quota_defs.h"
51#include "xfs_quota.h" 51#include "xfs_quota.h"
52#include "xfs_btree.h"
53#include "xfs_bmap_btree.h"
54#include "xfs_reflink.h" 52#include "xfs_reflink.h"
55#include "xfs_iomap.h" 53#include "xfs_iomap.h"
56#include "xfs_rmap_btree.h" 54#include "xfs_rmap_btree.h"
@@ -456,6 +454,8 @@ retry:
456 if (error) 454 if (error)
457 goto out_bmap_cancel; 455 goto out_bmap_cancel;
458 456
457 xfs_inode_set_cowblocks_tag(ip);
458
459 /* Finish up. */ 459 /* Finish up. */
460 error = xfs_defer_finish(&tp, &dfops); 460 error = xfs_defer_finish(&tp, &dfops);
461 if (error) 461 if (error)
@@ -492,8 +492,9 @@ xfs_reflink_find_cow_mapping(
492 struct xfs_iext_cursor icur; 492 struct xfs_iext_cursor icur;
493 493
494 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); 494 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
495 ASSERT(xfs_is_reflink_inode(ip));
496 495
496 if (!xfs_is_reflink_inode(ip))
497 return false;
497 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 498 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
498 if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got)) 499 if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got))
499 return false; 500 return false;
@@ -612,6 +613,9 @@ xfs_reflink_cancel_cow_blocks(
612 613
613 /* Remove the mapping from the CoW fork. */ 614 /* Remove the mapping from the CoW fork. */
614 xfs_bmap_del_extent_cow(ip, &icur, &got, &del); 615 xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
616 } else {
617 /* Didn't do anything, push cursor back. */
618 xfs_iext_prev(ifp, &icur);
615 } 619 }
616next_extent: 620next_extent:
617 if (!xfs_iext_get_extent(ifp, &icur, &got)) 621 if (!xfs_iext_get_extent(ifp, &icur, &got))
@@ -727,7 +731,7 @@ xfs_reflink_end_cow(
727 (unsigned int)(end_fsb - offset_fsb), 731 (unsigned int)(end_fsb - offset_fsb),
728 XFS_DATA_FORK); 732 XFS_DATA_FORK);
729 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, 733 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
730 resblks, 0, 0, &tp); 734 resblks, 0, XFS_TRANS_RESERVE, &tp);
731 if (error) 735 if (error)
732 goto out; 736 goto out;
733 737
@@ -1293,6 +1297,17 @@ xfs_reflink_remap_range(
1293 1297
1294 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); 1298 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
1295 1299
1300 /*
1301 * Clear out post-eof preallocations because we don't have page cache
1302 * backing the delayed allocations and they'll never get freed on
1303 * their own.
1304 */
1305 if (xfs_can_free_eofblocks(dest, true)) {
1306 ret = xfs_free_eofblocks(dest);
1307 if (ret)
1308 goto out_unlock;
1309 }
1310
1296 /* Set flags and remap blocks. */ 1311 /* Set flags and remap blocks. */
1297 ret = xfs_reflink_set_inode_flag(src, dest); 1312 ret = xfs_reflink_set_inode_flag(src, dest);
1298 if (ret) 1313 if (ret)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 5122d3021117..1dacccc367f8 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1360,6 +1360,7 @@ xfs_fs_remount(
1360 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1360 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1361 return error; 1361 return error;
1362 } 1362 }
1363 xfs_queue_cowblocks(mp);
1363 1364
1364 /* Create the per-AG metadata reservation pool .*/ 1365 /* Create the per-AG metadata reservation pool .*/
1365 error = xfs_fs_reserve_ag_blocks(mp); 1366 error = xfs_fs_reserve_ag_blocks(mp);
@@ -1369,6 +1370,14 @@ xfs_fs_remount(
1369 1370
1370 /* rw -> ro */ 1371 /* rw -> ro */
1371 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) { 1372 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) {
1373 /* Get rid of any leftover CoW reservations... */
1374 cancel_delayed_work_sync(&mp->m_cowblocks_work);
1375 error = xfs_icache_free_cowblocks(mp, NULL);
1376 if (error) {
1377 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1378 return error;
1379 }
1380
1372 /* Free the per-AG metadata reservation pool. */ 1381 /* Free the per-AG metadata reservation pool. */
1373 error = xfs_fs_unreserve_ag_blocks(mp); 1382 error = xfs_fs_unreserve_ag_blocks(mp);
1374 if (error) { 1383 if (error) {
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 68d3ca2c4968..2e9e793a8f9d 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -232,11 +232,6 @@ xfs_symlink(
232 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); 232 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
233 233
234 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, resblks, 0, 0, &tp); 234 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, resblks, 0, 0, &tp);
235 if (error == -ENOSPC && fs_blocks == 0) {
236 resblks = 0;
237 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, 0, 0, 0,
238 &tp);
239 }
240 if (error) 235 if (error)
241 goto out_release_inode; 236 goto out_release_inode;
242 237
@@ -260,14 +255,6 @@ xfs_symlink(
260 goto out_trans_cancel; 255 goto out_trans_cancel;
261 256
262 /* 257 /*
263 * Check for ability to enter directory entry, if no space reserved.
264 */
265 if (!resblks) {
266 error = xfs_dir_canenter(tp, dp, link_name);
267 if (error)
268 goto out_trans_cancel;
269 }
270 /*
271 * Initialize the bmap freelist prior to calling either 258 * Initialize the bmap freelist prior to calling either
272 * bmapi or the directory create code. 259 * bmapi or the directory create code.
273 */ 260 */
@@ -277,7 +264,7 @@ xfs_symlink(
277 * Allocate an inode for the symlink. 264 * Allocate an inode for the symlink.
278 */ 265 */
279 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, 266 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
280 prid, resblks > 0, &ip, NULL); 267 prid, &ip, NULL);
281 if (error) 268 if (error)
282 goto out_trans_cancel; 269 goto out_trans_cancel;
283 270
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index 5d95fe348294..35f3546b6af5 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -24,7 +24,6 @@
24#include "xfs_mount.h" 24#include "xfs_mount.h"
25#include "xfs_defer.h" 25#include "xfs_defer.h"
26#include "xfs_da_format.h" 26#include "xfs_da_format.h"
27#include "xfs_defer.h"
28#include "xfs_inode.h" 27#include "xfs_inode.h"
29#include "xfs_btree.h" 28#include "xfs_btree.h"
30#include "xfs_da_btree.h" 29#include "xfs_da_btree.h"
diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h
index ea189d88a3cc..8ac4e68a12f0 100644
--- a/include/asm-generic/mm_hooks.h
+++ b/include/asm-generic/mm_hooks.h
@@ -7,9 +7,10 @@
7#ifndef _ASM_GENERIC_MM_HOOKS_H 7#ifndef _ASM_GENERIC_MM_HOOKS_H
8#define _ASM_GENERIC_MM_HOOKS_H 8#define _ASM_GENERIC_MM_HOOKS_H
9 9
10static inline void arch_dup_mmap(struct mm_struct *oldmm, 10static inline int arch_dup_mmap(struct mm_struct *oldmm,
11 struct mm_struct *mm) 11 struct mm_struct *mm)
12{ 12{
13 return 0;
13} 14}
14 15
15static inline void arch_exit_mmap(struct mm_struct *mm) 16static inline void arch_exit_mmap(struct mm_struct *mm)
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index b234d54f2cb6..868e68561f91 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1025,6 +1025,11 @@ static inline int pmd_clear_huge(pmd_t *pmd)
1025struct file; 1025struct file;
1026int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, 1026int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
1027 unsigned long size, pgprot_t *vma_prot); 1027 unsigned long size, pgprot_t *vma_prot);
1028
1029#ifndef CONFIG_X86_ESPFIX64
1030static inline void init_espfix_bsp(void) { }
1031#endif
1032
1028#endif /* !__ASSEMBLY__ */ 1033#endif /* !__ASSEMBLY__ */
1029 1034
1030#ifndef io_remap_pfn_range 1035#ifndef io_remap_pfn_range
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index f0b44c16e88f..c2bae8da642c 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -82,6 +82,14 @@ int ahash_register_instance(struct crypto_template *tmpl,
82 struct ahash_instance *inst); 82 struct ahash_instance *inst);
83void ahash_free_instance(struct crypto_instance *inst); 83void ahash_free_instance(struct crypto_instance *inst);
84 84
85int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
86 unsigned int keylen);
87
88static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg)
89{
90 return alg->setkey != shash_no_setkey;
91}
92
85int crypto_init_ahash_spawn(struct crypto_ahash_spawn *spawn, 93int crypto_init_ahash_spawn(struct crypto_ahash_spawn *spawn,
86 struct hash_alg_common *alg, 94 struct hash_alg_common *alg,
87 struct crypto_instance *inst); 95 struct crypto_instance *inst);
diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h
index cceafa01f907..b67404fc4b34 100644
--- a/include/crypto/mcryptd.h
+++ b/include/crypto/mcryptd.h
@@ -27,6 +27,7 @@ static inline struct mcryptd_ahash *__mcryptd_ahash_cast(
27 27
28struct mcryptd_cpu_queue { 28struct mcryptd_cpu_queue {
29 struct crypto_queue queue; 29 struct crypto_queue queue;
30 spinlock_t q_lock;
30 struct work_struct work; 31 struct work_struct work;
31}; 32};
32 33
diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index a4649c56ca2f..5971577016a2 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -24,6 +24,7 @@
24#define __DRM_CONNECTOR_H__ 24#define __DRM_CONNECTOR_H__
25 25
26#include <linux/list.h> 26#include <linux/list.h>
27#include <linux/llist.h>
27#include <linux/ctype.h> 28#include <linux/ctype.h>
28#include <linux/hdmi.h> 29#include <linux/hdmi.h>
29#include <drm/drm_mode_object.h> 30#include <drm/drm_mode_object.h>
@@ -918,12 +919,13 @@ struct drm_connector {
918 uint16_t tile_h_size, tile_v_size; 919 uint16_t tile_h_size, tile_v_size;
919 920
920 /** 921 /**
921 * @free_work: 922 * @free_node:
922 * 923 *
923 * Work used only by &drm_connector_iter to be able to clean up a 924 * List used only by &drm_connector_iter to be able to clean up a
924 * connector from any context. 925 * connector from any context, in conjunction with
926 * &drm_mode_config.connector_free_work.
925 */ 927 */
926 struct work_struct free_work; 928 struct llist_node free_node;
927}; 929};
928 930
929#define obj_to_connector(x) container_of(x, struct drm_connector, base) 931#define obj_to_connector(x) container_of(x, struct drm_connector, base)
diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
index 2ec41d032e56..efe6d5a8e834 100644
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h
@@ -465,6 +465,8 @@ struct edid *drm_get_edid(struct drm_connector *connector,
465struct edid *drm_get_edid_switcheroo(struct drm_connector *connector, 465struct edid *drm_get_edid_switcheroo(struct drm_connector *connector,
466 struct i2c_adapter *adapter); 466 struct i2c_adapter *adapter);
467struct edid *drm_edid_duplicate(const struct edid *edid); 467struct edid *drm_edid_duplicate(const struct edid *edid);
468void drm_reset_display_info(struct drm_connector *connector);
469u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edid);
468int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid); 470int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid);
469 471
470u8 drm_match_cea_mode(const struct drm_display_mode *to_match); 472u8 drm_match_cea_mode(const struct drm_display_mode *to_match);
diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h
index b21e827c5c78..b0ce26d71296 100644
--- a/include/drm/drm_mode_config.h
+++ b/include/drm/drm_mode_config.h
@@ -27,6 +27,7 @@
27#include <linux/types.h> 27#include <linux/types.h>
28#include <linux/idr.h> 28#include <linux/idr.h>
29#include <linux/workqueue.h> 29#include <linux/workqueue.h>
30#include <linux/llist.h>
30 31
31#include <drm/drm_modeset_lock.h> 32#include <drm/drm_modeset_lock.h>
32 33
@@ -393,7 +394,7 @@ struct drm_mode_config {
393 394
394 /** 395 /**
395 * @connector_list_lock: Protects @num_connector and 396 * @connector_list_lock: Protects @num_connector and
396 * @connector_list. 397 * @connector_list and @connector_free_list.
397 */ 398 */
398 spinlock_t connector_list_lock; 399 spinlock_t connector_list_lock;
399 /** 400 /**
@@ -414,6 +415,21 @@ struct drm_mode_config {
414 */ 415 */
415 struct list_head connector_list; 416 struct list_head connector_list;
416 /** 417 /**
418 * @connector_free_list:
419 *
420 * List of connector objects linked with &drm_connector.free_head.
421 * Protected by @connector_list_lock. Used by
422 * drm_for_each_connector_iter() and
423 * &struct drm_connector_list_iter to savely free connectors using
424 * @connector_free_work.
425 */
426 struct llist_head connector_free_list;
427 /**
428 * @connector_free_work: Work to clean up @connector_free_list.
429 */
430 struct work_struct connector_free_work;
431
432 /**
417 * @num_encoder: 433 * @num_encoder:
418 * 434 *
419 * Number of encoders on this device. This is invariant over the 435 * Number of encoders on this device. This is invariant over the
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 6e45608b2399..9da6ce22803f 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -62,7 +62,7 @@ struct arch_timer_cpu {
62 bool enabled; 62 bool enabled;
63}; 63};
64 64
65int kvm_timer_hyp_init(void); 65int kvm_timer_hyp_init(bool);
66int kvm_timer_enable(struct kvm_vcpu *vcpu); 66int kvm_timer_enable(struct kvm_vcpu *vcpu);
67int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu); 67int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu);
68void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); 68void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 82f0c8fd7be8..23d29b39f71e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -492,6 +492,8 @@ extern unsigned int bvec_nr_vecs(unsigned short idx);
492 492
493#define bio_set_dev(bio, bdev) \ 493#define bio_set_dev(bio, bdev) \
494do { \ 494do { \
495 if ((bio)->bi_disk != (bdev)->bd_disk) \
496 bio_clear_flag(bio, BIO_THROTTLED);\
495 (bio)->bi_disk = (bdev)->bd_disk; \ 497 (bio)->bi_disk = (bdev)->bd_disk; \
496 (bio)->bi_partno = (bdev)->bd_partno; \ 498 (bio)->bi_partno = (bdev)->bd_partno; \
497} while (0) 499} while (0)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index a1e628e032da..9e7d8bd776d2 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -50,8 +50,6 @@ struct blk_issue_stat {
50struct bio { 50struct bio {
51 struct bio *bi_next; /* request queue link */ 51 struct bio *bi_next; /* request queue link */
52 struct gendisk *bi_disk; 52 struct gendisk *bi_disk;
53 u8 bi_partno;
54 blk_status_t bi_status;
55 unsigned int bi_opf; /* bottom bits req flags, 53 unsigned int bi_opf; /* bottom bits req flags,
56 * top bits REQ_OP. Use 54 * top bits REQ_OP. Use
57 * accessors. 55 * accessors.
@@ -59,8 +57,8 @@ struct bio {
59 unsigned short bi_flags; /* status, etc and bvec pool number */ 57 unsigned short bi_flags; /* status, etc and bvec pool number */
60 unsigned short bi_ioprio; 58 unsigned short bi_ioprio;
61 unsigned short bi_write_hint; 59 unsigned short bi_write_hint;
62 60 blk_status_t bi_status;
63 struct bvec_iter bi_iter; 61 u8 bi_partno;
64 62
65 /* Number of segments in this BIO after 63 /* Number of segments in this BIO after
66 * physical address coalescing is performed. 64 * physical address coalescing is performed.
@@ -74,8 +72,9 @@ struct bio {
74 unsigned int bi_seg_front_size; 72 unsigned int bi_seg_front_size;
75 unsigned int bi_seg_back_size; 73 unsigned int bi_seg_back_size;
76 74
77 atomic_t __bi_remaining; 75 struct bvec_iter bi_iter;
78 76
77 atomic_t __bi_remaining;
79 bio_end_io_t *bi_end_io; 78 bio_end_io_t *bi_end_io;
80 79
81 void *bi_private; 80 void *bi_private;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8089ca17db9a..0ce8a372d506 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -135,7 +135,7 @@ typedef __u32 __bitwise req_flags_t;
135struct request { 135struct request {
136 struct list_head queuelist; 136 struct list_head queuelist;
137 union { 137 union {
138 call_single_data_t csd; 138 struct __call_single_data csd;
139 u64 fifo_time; 139 u64 fifo_time;
140 }; 140 };
141 141
@@ -241,14 +241,24 @@ struct request {
241 struct request *next_rq; 241 struct request *next_rq;
242}; 242};
243 243
244static inline bool blk_op_is_scsi(unsigned int op)
245{
246 return op == REQ_OP_SCSI_IN || op == REQ_OP_SCSI_OUT;
247}
248
249static inline bool blk_op_is_private(unsigned int op)
250{
251 return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT;
252}
253
244static inline bool blk_rq_is_scsi(struct request *rq) 254static inline bool blk_rq_is_scsi(struct request *rq)
245{ 255{
246 return req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT; 256 return blk_op_is_scsi(req_op(rq));
247} 257}
248 258
249static inline bool blk_rq_is_private(struct request *rq) 259static inline bool blk_rq_is_private(struct request *rq)
250{ 260{
251 return req_op(rq) == REQ_OP_DRV_IN || req_op(rq) == REQ_OP_DRV_OUT; 261 return blk_op_is_private(req_op(rq));
252} 262}
253 263
254static inline bool blk_rq_is_passthrough(struct request *rq) 264static inline bool blk_rq_is_passthrough(struct request *rq)
@@ -256,6 +266,13 @@ static inline bool blk_rq_is_passthrough(struct request *rq)
256 return blk_rq_is_scsi(rq) || blk_rq_is_private(rq); 266 return blk_rq_is_scsi(rq) || blk_rq_is_private(rq);
257} 267}
258 268
269static inline bool bio_is_passthrough(struct bio *bio)
270{
271 unsigned op = bio_op(bio);
272
273 return blk_op_is_scsi(op) || blk_op_is_private(op);
274}
275
259static inline unsigned short req_get_ioprio(struct request *req) 276static inline unsigned short req_get_ioprio(struct request *req)
260{ 277{
261 return req->ioprio; 278 return req->ioprio;
@@ -948,7 +965,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
948extern void blk_rq_unprep_clone(struct request *rq); 965extern void blk_rq_unprep_clone(struct request *rq);
949extern blk_status_t blk_insert_cloned_request(struct request_queue *q, 966extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
950 struct request *rq); 967 struct request *rq);
951extern int blk_rq_append_bio(struct request *rq, struct bio *bio); 968extern int blk_rq_append_bio(struct request *rq, struct bio **bio);
952extern void blk_delay_queue(struct request_queue *, unsigned long); 969extern void blk_delay_queue(struct request_queue *, unsigned long);
953extern void blk_queue_split(struct request_queue *, struct bio **); 970extern void blk_queue_split(struct request_queue *, struct bio **);
954extern void blk_recount_segments(struct request_queue *, struct bio *); 971extern void blk_recount_segments(struct request_queue *, struct bio *);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index c561b986bab0..1632bb13ad8a 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -15,11 +15,11 @@
15 * In practice this is far bigger than any realistic pointer offset; this limit 15 * In practice this is far bigger than any realistic pointer offset; this limit
16 * ensures that umax_value + (int)off + (int)size cannot overflow a u64. 16 * ensures that umax_value + (int)off + (int)size cannot overflow a u64.
17 */ 17 */
18#define BPF_MAX_VAR_OFF (1ULL << 31) 18#define BPF_MAX_VAR_OFF (1 << 29)
19/* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO]. This ensures 19/* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO]. This ensures
20 * that converting umax_value to int cannot overflow. 20 * that converting umax_value to int cannot overflow.
21 */ 21 */
22#define BPF_MAX_VAR_SIZ INT_MAX 22#define BPF_MAX_VAR_SIZ (1 << 29)
23 23
24/* Liveness marks, used for registers and spilled-regs (in stack slots). 24/* Liveness marks, used for registers and spilled-regs (in stack slots).
25 * Read marks propagate upwards until they find a write mark; they record that 25 * Read marks propagate upwards until they find a write mark; they record that
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 188ed9f65517..52e611ab9a6c 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -220,21 +220,21 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
220/* 220/*
221 * Prevent the compiler from merging or refetching reads or writes. The 221 * Prevent the compiler from merging or refetching reads or writes. The
222 * compiler is also forbidden from reordering successive instances of 222 * compiler is also forbidden from reordering successive instances of
223 * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the 223 * READ_ONCE and WRITE_ONCE, but only when the compiler is aware of some
224 * compiler is aware of some particular ordering. One way to make the 224 * particular ordering. One way to make the compiler aware of ordering is to
225 * compiler aware of ordering is to put the two invocations of READ_ONCE, 225 * put the two invocations of READ_ONCE or WRITE_ONCE in different C
226 * WRITE_ONCE or ACCESS_ONCE() in different C statements. 226 * statements.
227 * 227 *
228 * In contrast to ACCESS_ONCE these two macros will also work on aggregate 228 * These two macros will also work on aggregate data types like structs or
229 * data types like structs or unions. If the size of the accessed data 229 * unions. If the size of the accessed data type exceeds the word size of
230 * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) 230 * the machine (e.g., 32 bits or 64 bits) READ_ONCE() and WRITE_ONCE() will
231 * READ_ONCE() and WRITE_ONCE() will fall back to memcpy(). There's at 231 * fall back to memcpy(). There's at least two memcpy()s: one for the
232 * least two memcpy()s: one for the __builtin_memcpy() and then one for 232 * __builtin_memcpy() and then one for the macro doing the copy of variable
233 * the macro doing the copy of variable - '__u' allocated on the stack. 233 * - '__u' allocated on the stack.
234 * 234 *
235 * Their two major use cases are: (1) Mediating communication between 235 * Their two major use cases are: (1) Mediating communication between
236 * process-level code and irq/NMI handlers, all running on the same CPU, 236 * process-level code and irq/NMI handlers, all running on the same CPU,
237 * and (2) Ensuring that the compiler does not fold, spindle, or otherwise 237 * and (2) Ensuring that the compiler does not fold, spindle, or otherwise
238 * mutilate accesses that either do not require ordering or that interact 238 * mutilate accesses that either do not require ordering or that interact
239 * with an explicit memory barrier or atomic instruction that provides the 239 * with an explicit memory barrier or atomic instruction that provides the
240 * required ordering. 240 * required ordering.
@@ -327,29 +327,4 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
327 compiletime_assert(__native_word(t), \ 327 compiletime_assert(__native_word(t), \
328 "Need native word sized stores/loads for atomicity.") 328 "Need native word sized stores/loads for atomicity.")
329 329
330/*
331 * Prevent the compiler from merging or refetching accesses. The compiler
332 * is also forbidden from reordering successive instances of ACCESS_ONCE(),
333 * but only when the compiler is aware of some particular ordering. One way
334 * to make the compiler aware of ordering is to put the two invocations of
335 * ACCESS_ONCE() in different C statements.
336 *
337 * ACCESS_ONCE will only work on scalar types. For union types, ACCESS_ONCE
338 * on a union member will work as long as the size of the member matches the
339 * size of the union and the size is smaller than word size.
340 *
341 * The major use cases of ACCESS_ONCE used to be (1) Mediating communication
342 * between process-level code and irq/NMI handlers, all running on the same CPU,
343 * and (2) Ensuring that the compiler does not fold, spindle, or otherwise
344 * mutilate accesses that either do not require ordering or that interact
345 * with an explicit memory barrier or atomic instruction that provides the
346 * required ordering.
347 *
348 * If possible use READ_ONCE()/WRITE_ONCE() instead.
349 */
350#define __ACCESS_ONCE(x) ({ \
351 __maybe_unused typeof(x) __var = (__force typeof(x)) 0; \
352 (volatile typeof(x) *)&(x); })
353#define ACCESS_ONCE(x) (*__ACCESS_ONCE(x))
354
355#endif /* __LINUX_COMPILER_H */ 330#endif /* __LINUX_COMPILER_H */
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 0662a417febe..94a59ba7d422 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -10,9 +10,6 @@
10 */ 10 */
11 11
12#include <linux/wait.h> 12#include <linux/wait.h>
13#ifdef CONFIG_LOCKDEP_COMPLETIONS
14#include <linux/lockdep.h>
15#endif
16 13
17/* 14/*
18 * struct completion - structure used to maintain state for a "completion" 15 * struct completion - structure used to maintain state for a "completion"
@@ -29,58 +26,16 @@
29struct completion { 26struct completion {
30 unsigned int done; 27 unsigned int done;
31 wait_queue_head_t wait; 28 wait_queue_head_t wait;
32#ifdef CONFIG_LOCKDEP_COMPLETIONS
33 struct lockdep_map_cross map;
34#endif
35}; 29};
36 30
37#ifdef CONFIG_LOCKDEP_COMPLETIONS
38static inline void complete_acquire(struct completion *x)
39{
40 lock_acquire_exclusive((struct lockdep_map *)&x->map, 0, 0, NULL, _RET_IP_);
41}
42
43static inline void complete_release(struct completion *x)
44{
45 lock_release((struct lockdep_map *)&x->map, 0, _RET_IP_);
46}
47
48static inline void complete_release_commit(struct completion *x)
49{
50 lock_commit_crosslock((struct lockdep_map *)&x->map);
51}
52
53#define init_completion_map(x, m) \
54do { \
55 lockdep_init_map_crosslock((struct lockdep_map *)&(x)->map, \
56 (m)->name, (m)->key, 0); \
57 __init_completion(x); \
58} while (0)
59
60#define init_completion(x) \
61do { \
62 static struct lock_class_key __key; \
63 lockdep_init_map_crosslock((struct lockdep_map *)&(x)->map, \
64 "(completion)" #x, \
65 &__key, 0); \
66 __init_completion(x); \
67} while (0)
68#else
69#define init_completion_map(x, m) __init_completion(x) 31#define init_completion_map(x, m) __init_completion(x)
70#define init_completion(x) __init_completion(x) 32#define init_completion(x) __init_completion(x)
71static inline void complete_acquire(struct completion *x) {} 33static inline void complete_acquire(struct completion *x) {}
72static inline void complete_release(struct completion *x) {} 34static inline void complete_release(struct completion *x) {}
73static inline void complete_release_commit(struct completion *x) {} 35static inline void complete_release_commit(struct completion *x) {}
74#endif
75 36
76#ifdef CONFIG_LOCKDEP_COMPLETIONS
77#define COMPLETION_INITIALIZER(work) \
78 { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait), \
79 STATIC_CROSS_LOCKDEP_MAP_INIT("(completion)" #work, &(work)) }
80#else
81#define COMPLETION_INITIALIZER(work) \ 37#define COMPLETION_INITIALIZER(work) \
82 { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } 38 { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
83#endif
84 39
85#define COMPLETION_INITIALIZER_ONSTACK_MAP(work, map) \ 40#define COMPLETION_INITIALIZER_ONSTACK_MAP(work, map) \
86 (*({ init_completion_map(&(work), &(map)); &(work); })) 41 (*({ init_completion_map(&(work), &(map)); &(work); }))
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 201ab7267986..1a32e558eb11 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -86,7 +86,7 @@ enum cpuhp_state {
86 CPUHP_MM_ZSWP_POOL_PREPARE, 86 CPUHP_MM_ZSWP_POOL_PREPARE,
87 CPUHP_KVM_PPC_BOOK3S_PREPARE, 87 CPUHP_KVM_PPC_BOOK3S_PREPARE,
88 CPUHP_ZCOMP_PREPARE, 88 CPUHP_ZCOMP_PREPARE,
89 CPUHP_TIMERS_DEAD, 89 CPUHP_TIMERS_PREPARE,
90 CPUHP_MIPS_SOC_PREPARE, 90 CPUHP_MIPS_SOC_PREPARE,
91 CPUHP_BP_PREPARE_DYN, 91 CPUHP_BP_PREPARE_DYN,
92 CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20, 92 CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20,
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 099058e1178b..631286535d0f 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -83,6 +83,7 @@ extern int set_current_groups(struct group_info *);
83extern void set_groups(struct cred *, struct group_info *); 83extern void set_groups(struct cred *, struct group_info *);
84extern int groups_search(const struct group_info *, kgid_t); 84extern int groups_search(const struct group_info *, kgid_t);
85extern bool may_setgroups(void); 85extern bool may_setgroups(void);
86extern void groups_sort(struct group_info *);
86 87
87/* 88/*
88 * The security context of a task 89 * The security context of a task
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 55e672592fa9..7258cd676df4 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -66,9 +66,10 @@ struct gpio_irq_chip {
66 /** 66 /**
67 * @lock_key: 67 * @lock_key:
68 * 68 *
69 * Per GPIO IRQ chip lockdep class. 69 * Per GPIO IRQ chip lockdep classes.
70 */ 70 */
71 struct lock_class_key *lock_key; 71 struct lock_class_key *lock_key;
72 struct lock_class_key *request_key;
72 73
73 /** 74 /**
74 * @parent_handler: 75 * @parent_handler:
@@ -323,7 +324,8 @@ extern const char *gpiochip_is_requested(struct gpio_chip *chip,
323 324
324/* add/remove chips */ 325/* add/remove chips */
325extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, 326extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
326 struct lock_class_key *lock_key); 327 struct lock_class_key *lock_key,
328 struct lock_class_key *request_key);
327 329
328/** 330/**
329 * gpiochip_add_data() - register a gpio_chip 331 * gpiochip_add_data() - register a gpio_chip
@@ -350,11 +352,13 @@ extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
350 */ 352 */
351#ifdef CONFIG_LOCKDEP 353#ifdef CONFIG_LOCKDEP
352#define gpiochip_add_data(chip, data) ({ \ 354#define gpiochip_add_data(chip, data) ({ \
353 static struct lock_class_key key; \ 355 static struct lock_class_key lock_key; \
354 gpiochip_add_data_with_key(chip, data, &key); \ 356 static struct lock_class_key request_key; \
357 gpiochip_add_data_with_key(chip, data, &lock_key, \
358 &request_key); \
355 }) 359 })
356#else 360#else
357#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL) 361#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL, NULL)
358#endif 362#endif
359 363
360static inline int gpiochip_add(struct gpio_chip *chip) 364static inline int gpiochip_add(struct gpio_chip *chip)
@@ -429,7 +433,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip,
429 irq_flow_handler_t handler, 433 irq_flow_handler_t handler,
430 unsigned int type, 434 unsigned int type,
431 bool threaded, 435 bool threaded,
432 struct lock_class_key *lock_key); 436 struct lock_class_key *lock_key,
437 struct lock_class_key *request_key);
433 438
434#ifdef CONFIG_LOCKDEP 439#ifdef CONFIG_LOCKDEP
435 440
@@ -445,10 +450,12 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
445 irq_flow_handler_t handler, 450 irq_flow_handler_t handler,
446 unsigned int type) 451 unsigned int type)
447{ 452{
448 static struct lock_class_key key; 453 static struct lock_class_key lock_key;
454 static struct lock_class_key request_key;
449 455
450 return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, 456 return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
451 handler, type, false, &key); 457 handler, type, false,
458 &lock_key, &request_key);
452} 459}
453 460
454static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, 461static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
@@ -458,10 +465,12 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
458 unsigned int type) 465 unsigned int type)
459{ 466{
460 467
461 static struct lock_class_key key; 468 static struct lock_class_key lock_key;
469 static struct lock_class_key request_key;
462 470
463 return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, 471 return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
464 handler, type, true, &key); 472 handler, type, true,
473 &lock_key, &request_key);
465} 474}
466#else 475#else
467static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, 476static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
@@ -471,7 +480,7 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
471 unsigned int type) 480 unsigned int type)
472{ 481{
473 return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, 482 return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
474 handler, type, false, NULL); 483 handler, type, false, NULL, NULL);
475} 484}
476 485
477static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, 486static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
@@ -481,7 +490,7 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
481 unsigned int type) 490 unsigned int type)
482{ 491{
483 return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, 492 return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
484 handler, type, true, NULL); 493 handler, type, true, NULL, NULL);
485} 494}
486#endif /* CONFIG_LOCKDEP */ 495#endif /* CONFIG_LOCKDEP */
487 496
diff --git a/include/linux/idr.h b/include/linux/idr.h
index 7c3a365f7e12..fa14f834e4ed 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -15,6 +15,7 @@
15#include <linux/radix-tree.h> 15#include <linux/radix-tree.h>
16#include <linux/gfp.h> 16#include <linux/gfp.h>
17#include <linux/percpu.h> 17#include <linux/percpu.h>
18#include <linux/bug.h>
18 19
19struct idr { 20struct idr {
20 struct radix_tree_root idr_rt; 21 struct radix_tree_root idr_rt;
diff --git a/include/linux/intel-pti.h b/include/linux/intel-pti.h
new file mode 100644
index 000000000000..2710d72de3c9
--- /dev/null
+++ b/include/linux/intel-pti.h
@@ -0,0 +1,43 @@
1/*
2 * Copyright (C) Intel 2011
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
14 *
15 * The PTI (Parallel Trace Interface) driver directs trace data routed from
16 * various parts in the system out through the Intel Penwell PTI port and
17 * out of the mobile device for analysis with a debugging tool
18 * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
19 * compact JTAG, standard.
20 *
21 * This header file will allow other parts of the OS to use the
22 * interface to write out it's contents for debugging a mobile system.
23 */
24
25#ifndef LINUX_INTEL_PTI_H_
26#define LINUX_INTEL_PTI_H_
27
28/* offset for last dword of any PTI message. Part of MIPI P1149.7 */
29#define PTI_LASTDWORD_DTS 0x30
30
31/* basic structure used as a write address to the PTI HW */
32struct pti_masterchannel {
33 u8 master;
34 u8 channel;
35};
36
37/* the following functions are defined in misc/pti.c */
38void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count);
39struct pti_masterchannel *pti_request_masterchannel(u8 type,
40 const char *thread_name);
41void pti_release_masterchannel(struct pti_masterchannel *mc);
42
43#endif /* LINUX_INTEL_PTI_H_ */
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index cb18c6290ca8..8415bf1a9776 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -273,7 +273,8 @@ struct ipv6_pinfo {
273 * 100: prefer care-of address 273 * 100: prefer care-of address
274 */ 274 */
275 dontfrag:1, 275 dontfrag:1,
276 autoflowlabel:1; 276 autoflowlabel:1,
277 autoflowlabel_set:1;
277 __u8 min_hopcount; 278 __u8 min_hopcount;
278 __u8 tclass; 279 __u8 tclass;
279 __be32 rcv_flowinfo; 280 __be32 rcv_flowinfo;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index e140f69163b6..a0231e96a578 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -212,6 +212,7 @@ struct irq_data {
212 * mask. Applies only to affinity managed irqs. 212 * mask. Applies only to affinity managed irqs.
213 * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target 213 * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target
214 * IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set 214 * IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set
215 * IRQD_CAN_RESERVE - Can use reservation mode
215 */ 216 */
216enum { 217enum {
217 IRQD_TRIGGER_MASK = 0xf, 218 IRQD_TRIGGER_MASK = 0xf,
@@ -233,6 +234,7 @@ enum {
233 IRQD_MANAGED_SHUTDOWN = (1 << 23), 234 IRQD_MANAGED_SHUTDOWN = (1 << 23),
234 IRQD_SINGLE_TARGET = (1 << 24), 235 IRQD_SINGLE_TARGET = (1 << 24),
235 IRQD_DEFAULT_TRIGGER_SET = (1 << 25), 236 IRQD_DEFAULT_TRIGGER_SET = (1 << 25),
237 IRQD_CAN_RESERVE = (1 << 26),
236}; 238};
237 239
238#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) 240#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
@@ -377,6 +379,21 @@ static inline bool irqd_is_managed_and_shutdown(struct irq_data *d)
377 return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN; 379 return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN;
378} 380}
379 381
382static inline void irqd_set_can_reserve(struct irq_data *d)
383{
384 __irqd_to_state(d) |= IRQD_CAN_RESERVE;
385}
386
387static inline void irqd_clr_can_reserve(struct irq_data *d)
388{
389 __irqd_to_state(d) &= ~IRQD_CAN_RESERVE;
390}
391
392static inline bool irqd_can_reserve(struct irq_data *d)
393{
394 return __irqd_to_state(d) & IRQD_CAN_RESERVE;
395}
396
380#undef __irqd_to_state 397#undef __irqd_to_state
381 398
382static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) 399static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 39fb3700f7a9..25b33b664537 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -255,12 +255,15 @@ static inline bool irq_is_percpu_devid(unsigned int irq)
255} 255}
256 256
257static inline void 257static inline void
258irq_set_lockdep_class(unsigned int irq, struct lock_class_key *class) 258irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class,
259 struct lock_class_key *request_class)
259{ 260{
260 struct irq_desc *desc = irq_to_desc(irq); 261 struct irq_desc *desc = irq_to_desc(irq);
261 262
262 if (desc) 263 if (desc) {
263 lockdep_set_class(&desc->lock, class); 264 lockdep_set_class(&desc->lock, lock_class);
265 lockdep_set_class(&desc->request_mutex, request_class);
266 }
264} 267}
265 268
266#ifdef CONFIG_IRQ_PREFLOW_FASTEOI 269#ifdef CONFIG_IRQ_PREFLOW_FASTEOI
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index a34355d19546..48c7e86bb556 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -113,7 +113,7 @@ struct irq_domain_ops {
113 unsigned int nr_irqs, void *arg); 113 unsigned int nr_irqs, void *arg);
114 void (*free)(struct irq_domain *d, unsigned int virq, 114 void (*free)(struct irq_domain *d, unsigned int virq,
115 unsigned int nr_irqs); 115 unsigned int nr_irqs);
116 int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool early); 116 int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve);
117 void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data); 117 void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data);
118 int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec, 118 int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec,
119 unsigned long *out_hwirq, unsigned int *out_type); 119 unsigned long *out_hwirq, unsigned int *out_type);
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index a842551fe044..2e75dc34bff5 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -158,12 +158,6 @@ struct lockdep_map {
158 int cpu; 158 int cpu;
159 unsigned long ip; 159 unsigned long ip;
160#endif 160#endif
161#ifdef CONFIG_LOCKDEP_CROSSRELEASE
162 /*
163 * Whether it's a crosslock.
164 */
165 int cross;
166#endif
167}; 161};
168 162
169static inline void lockdep_copy_map(struct lockdep_map *to, 163static inline void lockdep_copy_map(struct lockdep_map *to,
@@ -267,96 +261,9 @@ struct held_lock {
267 unsigned int hardirqs_off:1; 261 unsigned int hardirqs_off:1;
268 unsigned int references:12; /* 32 bits */ 262 unsigned int references:12; /* 32 bits */
269 unsigned int pin_count; 263 unsigned int pin_count;
270#ifdef CONFIG_LOCKDEP_CROSSRELEASE
271 /*
272 * Generation id.
273 *
274 * A value of cross_gen_id will be stored when holding this,
275 * which is globally increased whenever each crosslock is held.
276 */
277 unsigned int gen_id;
278#endif
279};
280
281#ifdef CONFIG_LOCKDEP_CROSSRELEASE
282#define MAX_XHLOCK_TRACE_ENTRIES 5
283
284/*
285 * This is for keeping locks waiting for commit so that true dependencies
286 * can be added at commit step.
287 */
288struct hist_lock {
289 /*
290 * Id for each entry in the ring buffer. This is used to
291 * decide whether the ring buffer was overwritten or not.
292 *
293 * For example,
294 *
295 * |<----------- hist_lock ring buffer size ------->|
296 * pppppppppppppppppppppiiiiiiiiiiiiiiiiiiiiiiiiiiiii
297 * wrapped > iiiiiiiiiiiiiiiiiiiiiiiiiii.......................
298 *
299 * where 'p' represents an acquisition in process
300 * context, 'i' represents an acquisition in irq
301 * context.
302 *
303 * In this example, the ring buffer was overwritten by
304 * acquisitions in irq context, that should be detected on
305 * rollback or commit.
306 */
307 unsigned int hist_id;
308
309 /*
310 * Seperate stack_trace data. This will be used at commit step.
311 */
312 struct stack_trace trace;
313 unsigned long trace_entries[MAX_XHLOCK_TRACE_ENTRIES];
314
315 /*
316 * Seperate hlock instance. This will be used at commit step.
317 *
318 * TODO: Use a smaller data structure containing only necessary
319 * data. However, we should make lockdep code able to handle the
320 * smaller one first.
321 */
322 struct held_lock hlock;
323}; 264};
324 265
325/* 266/*
326 * To initialize a lock as crosslock, lockdep_init_map_crosslock() should
327 * be called instead of lockdep_init_map().
328 */
329struct cross_lock {
330 /*
331 * When more than one acquisition of crosslocks are overlapped,
332 * we have to perform commit for them based on cross_gen_id of
333 * the first acquisition, which allows us to add more true
334 * dependencies.
335 *
336 * Moreover, when no acquisition of a crosslock is in progress,
337 * we should not perform commit because the lock might not exist
338 * any more, which might cause incorrect memory access. So we
339 * have to track the number of acquisitions of a crosslock.
340 */
341 int nr_acquire;
342
343 /*
344 * Seperate hlock instance. This will be used at commit step.
345 *
346 * TODO: Use a smaller data structure containing only necessary
347 * data. However, we should make lockdep code able to handle the
348 * smaller one first.
349 */
350 struct held_lock hlock;
351};
352
353struct lockdep_map_cross {
354 struct lockdep_map map;
355 struct cross_lock xlock;
356};
357#endif
358
359/*
360 * Initialization, self-test and debugging-output methods: 267 * Initialization, self-test and debugging-output methods:
361 */ 268 */
362extern void lockdep_info(void); 269extern void lockdep_info(void);
@@ -560,37 +467,6 @@ enum xhlock_context_t {
560 XHLOCK_CTX_NR, 467 XHLOCK_CTX_NR,
561}; 468};
562 469
563#ifdef CONFIG_LOCKDEP_CROSSRELEASE
564extern void lockdep_init_map_crosslock(struct lockdep_map *lock,
565 const char *name,
566 struct lock_class_key *key,
567 int subclass);
568extern void lock_commit_crosslock(struct lockdep_map *lock);
569
570/*
571 * What we essencially have to initialize is 'nr_acquire'. Other members
572 * will be initialized in add_xlock().
573 */
574#define STATIC_CROSS_LOCK_INIT() \
575 { .nr_acquire = 0,}
576
577#define STATIC_CROSS_LOCKDEP_MAP_INIT(_name, _key) \
578 { .map.name = (_name), .map.key = (void *)(_key), \
579 .map.cross = 1, .xlock = STATIC_CROSS_LOCK_INIT(), }
580
581/*
582 * To initialize a lockdep_map statically use this macro.
583 * Note that _name must not be NULL.
584 */
585#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
586 { .name = (_name), .key = (void *)(_key), .cross = 0, }
587
588extern void crossrelease_hist_start(enum xhlock_context_t c);
589extern void crossrelease_hist_end(enum xhlock_context_t c);
590extern void lockdep_invariant_state(bool force);
591extern void lockdep_init_task(struct task_struct *task);
592extern void lockdep_free_task(struct task_struct *task);
593#else /* !CROSSRELEASE */
594#define lockdep_init_map_crosslock(m, n, k, s) do {} while (0) 470#define lockdep_init_map_crosslock(m, n, k, s) do {} while (0)
595/* 471/*
596 * To initialize a lockdep_map statically use this macro. 472 * To initialize a lockdep_map statically use this macro.
@@ -604,7 +480,6 @@ static inline void crossrelease_hist_end(enum xhlock_context_t c) {}
604static inline void lockdep_invariant_state(bool force) {} 480static inline void lockdep_invariant_state(bool force) {}
605static inline void lockdep_init_task(struct task_struct *task) {} 481static inline void lockdep_init_task(struct task_struct *task) {}
606static inline void lockdep_free_task(struct task_struct *task) {} 482static inline void lockdep_free_task(struct task_struct *task) {}
607#endif /* CROSSRELEASE */
608 483
609#ifdef CONFIG_LOCK_STAT 484#ifdef CONFIG_LOCK_STAT
610 485
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index a2a1318a3d0c..c3d3f04d8cc6 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -915,10 +915,10 @@ enum PDEV_STAT {PDEV_STAT_IDLE, PDEV_STAT_RUN};
915#define LTR_L1SS_PWR_GATE_CHECK_CARD_EN BIT(6) 915#define LTR_L1SS_PWR_GATE_CHECK_CARD_EN BIT(6)
916 916
917enum dev_aspm_mode { 917enum dev_aspm_mode {
918 DEV_ASPM_DISABLE = 0,
919 DEV_ASPM_DYNAMIC, 918 DEV_ASPM_DYNAMIC,
920 DEV_ASPM_BACKDOOR, 919 DEV_ASPM_BACKDOOR,
921 DEV_ASPM_STATIC, 920 DEV_ASPM_STATIC,
921 DEV_ASPM_DISABLE,
922}; 922};
923 923
924/* 924/*
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index a886b51511ab..1f509d072026 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -556,6 +556,7 @@ struct mlx5_core_sriov {
556}; 556};
557 557
558struct mlx5_irq_info { 558struct mlx5_irq_info {
559 cpumask_var_t mask;
559 char name[MLX5_MAX_IRQ_NAME]; 560 char name[MLX5_MAX_IRQ_NAME];
560}; 561};
561 562
@@ -1048,7 +1049,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
1048 enum mlx5_eq_type type); 1049 enum mlx5_eq_type type);
1049int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); 1050int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
1050int mlx5_start_eqs(struct mlx5_core_dev *dev); 1051int mlx5_start_eqs(struct mlx5_core_dev *dev);
1051int mlx5_stop_eqs(struct mlx5_core_dev *dev); 1052void mlx5_stop_eqs(struct mlx5_core_dev *dev);
1052int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, 1053int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
1053 unsigned int *irqn); 1054 unsigned int *irqn);
1054int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); 1055int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
@@ -1164,6 +1165,10 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev);
1164int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev); 1165int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
1165bool mlx5_lag_is_active(struct mlx5_core_dev *dev); 1166bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
1166struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); 1167struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
1168int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1169 u64 *values,
1170 int num_counters,
1171 size_t *offsets);
1167struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); 1172struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
1168void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); 1173void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
1169 1174
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 38a7577a9ce7..d44ec5f41d4a 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -147,7 +147,7 @@ enum {
147 MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771, 147 MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771,
148 MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772, 148 MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772,
149 MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773, 149 MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773,
150 MLX5_CMD_OP_SET_RATE_LIMIT = 0x780, 150 MLX5_CMD_OP_SET_PP_RATE_LIMIT = 0x780,
151 MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781, 151 MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781,
152 MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT = 0x782, 152 MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT = 0x782,
153 MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT = 0x783, 153 MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT = 0x783,
@@ -7239,7 +7239,7 @@ struct mlx5_ifc_add_vxlan_udp_dport_in_bits {
7239 u8 vxlan_udp_port[0x10]; 7239 u8 vxlan_udp_port[0x10];
7240}; 7240};
7241 7241
7242struct mlx5_ifc_set_rate_limit_out_bits { 7242struct mlx5_ifc_set_pp_rate_limit_out_bits {
7243 u8 status[0x8]; 7243 u8 status[0x8];
7244 u8 reserved_at_8[0x18]; 7244 u8 reserved_at_8[0x18];
7245 7245
@@ -7248,7 +7248,7 @@ struct mlx5_ifc_set_rate_limit_out_bits {
7248 u8 reserved_at_40[0x40]; 7248 u8 reserved_at_40[0x40];
7249}; 7249};
7250 7250
7251struct mlx5_ifc_set_rate_limit_in_bits { 7251struct mlx5_ifc_set_pp_rate_limit_in_bits {
7252 u8 opcode[0x10]; 7252 u8 opcode[0x10];
7253 u8 reserved_at_10[0x10]; 7253 u8 reserved_at_10[0x10];
7254 7254
@@ -7261,6 +7261,8 @@ struct mlx5_ifc_set_rate_limit_in_bits {
7261 u8 reserved_at_60[0x20]; 7261 u8 reserved_at_60[0x20];
7262 7262
7263 u8 rate_limit[0x20]; 7263 u8 rate_limit[0x20];
7264
7265 u8 reserved_at_a0[0x160];
7264}; 7266};
7265 7267
7266struct mlx5_ifc_access_register_out_bits { 7268struct mlx5_ifc_access_register_out_bits {
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 01c91d874a57..5bad038ac012 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -67,6 +67,15 @@ static inline bool tsk_is_oom_victim(struct task_struct * tsk)
67} 67}
68 68
69/* 69/*
70 * Use this helper if tsk->mm != mm and the victim mm needs a special
71 * handling. This is guaranteed to stay true after once set.
72 */
73static inline bool mm_is_oom_victim(struct mm_struct *mm)
74{
75 return test_bit(MMF_OOM_VICTIM, &mm->flags);
76}
77
78/*
70 * Checks whether a page fault on the given mm is still reliable. 79 * Checks whether a page fault on the given mm is still reliable.
71 * This is no longer true if the oom reaper started to reap the 80 * This is no longer true if the oom reaper started to reap the
72 * address space which is reflected by MMF_UNSTABLE flag set in 81 * address space which is reflected by MMF_UNSTABLE flag set in
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0403894147a3..c170c9250c8b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1674,6 +1674,9 @@ static inline struct pci_dev *pci_get_slot(struct pci_bus *bus,
1674static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus, 1674static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus,
1675 unsigned int devfn) 1675 unsigned int devfn)
1676{ return NULL; } 1676{ return NULL; }
1677static inline struct pci_dev *pci_get_domain_bus_and_slot(int domain,
1678 unsigned int bus, unsigned int devfn)
1679{ return NULL; }
1677 1680
1678static inline int pci_domain_nr(struct pci_bus *bus) { return 0; } 1681static inline int pci_domain_nr(struct pci_bus *bus) { return 0; }
1679static inline struct pci_dev *pci_dev_get(struct pci_dev *dev) { return NULL; } 1682static inline struct pci_dev *pci_dev_get(struct pci_dev *dev) { return NULL; }
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 65d39115f06d..492ed473ba7e 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -765,6 +765,7 @@ extern int pm_generic_poweroff_late(struct device *dev);
765extern int pm_generic_poweroff(struct device *dev); 765extern int pm_generic_poweroff(struct device *dev);
766extern void pm_generic_complete(struct device *dev); 766extern void pm_generic_complete(struct device *dev);
767 767
768extern void dev_pm_skip_next_resume_phases(struct device *dev);
768extern bool dev_pm_smart_suspend_and_suspended(struct device *dev); 769extern bool dev_pm_smart_suspend_and_suspended(struct device *dev);
769 770
770#else /* !CONFIG_PM_SLEEP */ 771#else /* !CONFIG_PM_SLEEP */
diff --git a/include/linux/pti.h b/include/linux/pti.h
index b3ea01a3197e..0174883a935a 100644
--- a/include/linux/pti.h
+++ b/include/linux/pti.h
@@ -1,43 +1,11 @@
1/* 1// SPDX-License-Identifier: GPL-2.0
2 * Copyright (C) Intel 2011 2#ifndef _INCLUDE_PTI_H
3 * 3#define _INCLUDE_PTI_H
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
14 *
15 * The PTI (Parallel Trace Interface) driver directs trace data routed from
16 * various parts in the system out through the Intel Penwell PTI port and
17 * out of the mobile device for analysis with a debugging tool
18 * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
19 * compact JTAG, standard.
20 *
21 * This header file will allow other parts of the OS to use the
22 * interface to write out it's contents for debugging a mobile system.
23 */
24 4
25#ifndef PTI_H_ 5#ifdef CONFIG_PAGE_TABLE_ISOLATION
26#define PTI_H_ 6#include <asm/pti.h>
7#else
8static inline void pti_init(void) { }
9#endif
27 10
28/* offset for last dword of any PTI message. Part of MIPI P1149.7 */ 11#endif
29#define PTI_LASTDWORD_DTS 0x30
30
31/* basic structure used as a write address to the PTI HW */
32struct pti_masterchannel {
33 u8 master;
34 u8 channel;
35};
36
37/* the following functions are defined in misc/pti.c */
38void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count);
39struct pti_masterchannel *pti_request_masterchannel(u8 type,
40 const char *thread_name);
41void pti_release_masterchannel(struct pti_masterchannel *mc);
42
43#endif /*PTI_H_*/
diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 37b4bb2545b3..6866df4f31b5 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -101,12 +101,18 @@ static inline bool ptr_ring_full_bh(struct ptr_ring *r)
101 101
102/* Note: callers invoking this in a loop must use a compiler barrier, 102/* Note: callers invoking this in a loop must use a compiler barrier,
103 * for example cpu_relax(). Callers must hold producer_lock. 103 * for example cpu_relax(). Callers must hold producer_lock.
104 * Callers are responsible for making sure pointer that is being queued
105 * points to a valid data.
104 */ 106 */
105static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) 107static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
106{ 108{
107 if (unlikely(!r->size) || r->queue[r->producer]) 109 if (unlikely(!r->size) || r->queue[r->producer])
108 return -ENOSPC; 110 return -ENOSPC;
109 111
112 /* Make sure the pointer we are storing points to a valid data. */
113 /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */
114 smp_wmb();
115
110 r->queue[r->producer++] = ptr; 116 r->queue[r->producer++] = ptr;
111 if (unlikely(r->producer >= r->size)) 117 if (unlikely(r->producer >= r->size))
112 r->producer = 0; 118 r->producer = 0;
@@ -275,6 +281,9 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r)
275 if (ptr) 281 if (ptr)
276 __ptr_ring_discard_one(r); 282 __ptr_ring_discard_one(r);
277 283
284 /* Make sure anyone accessing data through the pointer is up to date. */
285 /* Pairs with smp_wmb in __ptr_ring_produce. */
286 smp_read_barrier_depends();
278 return ptr; 287 return ptr;
279} 288}
280 289
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index d574361943ea..fcbeed4053ef 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -99,6 +99,8 @@ extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
99 struct rb_root *root); 99 struct rb_root *root);
100extern void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new, 100extern void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
101 struct rb_root *root); 101 struct rb_root *root);
102extern void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new,
103 struct rb_root_cached *root);
102 104
103static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, 105static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
104 struct rb_node **rb_link) 106 struct rb_node **rb_link)
diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h
index cc0072e93e36..857a72ceb794 100644
--- a/include/linux/rwlock_types.h
+++ b/include/linux/rwlock_types.h
@@ -10,9 +10,6 @@
10 */ 10 */
11typedef struct { 11typedef struct {
12 arch_rwlock_t raw_lock; 12 arch_rwlock_t raw_lock;
13#ifdef CONFIG_GENERIC_LOCKBREAK
14 unsigned int break_lock;
15#endif
16#ifdef CONFIG_DEBUG_SPINLOCK 13#ifdef CONFIG_DEBUG_SPINLOCK
17 unsigned int magic, owner_cpu; 14 unsigned int magic, owner_cpu;
18 void *owner; 15 void *owner;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 21991d668d35..d2588263a989 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -849,17 +849,6 @@ struct task_struct {
849 struct held_lock held_locks[MAX_LOCK_DEPTH]; 849 struct held_lock held_locks[MAX_LOCK_DEPTH];
850#endif 850#endif
851 851
852#ifdef CONFIG_LOCKDEP_CROSSRELEASE
853#define MAX_XHLOCKS_NR 64UL
854 struct hist_lock *xhlocks; /* Crossrelease history locks */
855 unsigned int xhlock_idx;
856 /* For restoring at history boundaries */
857 unsigned int xhlock_idx_hist[XHLOCK_CTX_NR];
858 unsigned int hist_id;
859 /* For overwrite check at each context exit */
860 unsigned int hist_id_save[XHLOCK_CTX_NR];
861#endif
862
863#ifdef CONFIG_UBSAN 852#ifdef CONFIG_UBSAN
864 unsigned int in_ubsan; 853 unsigned int in_ubsan;
865#endif 854#endif
@@ -1503,7 +1492,11 @@ static inline void set_task_comm(struct task_struct *tsk, const char *from)
1503 __set_task_comm(tsk, from, false); 1492 __set_task_comm(tsk, from, false);
1504} 1493}
1505 1494
1506extern char *get_task_comm(char *to, struct task_struct *tsk); 1495extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk);
1496#define get_task_comm(buf, tsk) ({ \
1497 BUILD_BUG_ON(sizeof(buf) != TASK_COMM_LEN); \
1498 __get_task_comm(buf, sizeof(buf), tsk); \
1499})
1507 1500
1508#ifdef CONFIG_SMP 1501#ifdef CONFIG_SMP
1509void scheduler_ipi(void); 1502void scheduler_ipi(void);
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index 9c8847395b5e..ec912d01126f 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -70,6 +70,7 @@ static inline int get_dumpable(struct mm_struct *mm)
70#define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ 70#define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */
71#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ 71#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */
72#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ 72#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */
73#define MMF_OOM_VICTIM 25 /* mm is the oom victim */
73#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) 74#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP)
74 75
75#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ 76#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 7b2170bfd6e7..bc6bb325d1bf 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -126,7 +126,7 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
126 * for that name. This appears in the sysfs "modalias" attribute 126 * for that name. This appears in the sysfs "modalias" attribute
127 * for driver coldplugging, and in uevents used for hotplugging 127 * for driver coldplugging, and in uevents used for hotplugging
128 * @cs_gpio: gpio number of the chipselect line (optional, -ENOENT when 128 * @cs_gpio: gpio number of the chipselect line (optional, -ENOENT when
129 * when not using a GPIO line) 129 * not using a GPIO line)
130 * 130 *
131 * @statistics: statistics for the spi_device 131 * @statistics: statistics for the spi_device
132 * 132 *
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index a39186194cd6..3bf273538840 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -107,16 +107,11 @@ do { \
107 107
108#define raw_spin_is_locked(lock) arch_spin_is_locked(&(lock)->raw_lock) 108#define raw_spin_is_locked(lock) arch_spin_is_locked(&(lock)->raw_lock)
109 109
110#ifdef CONFIG_GENERIC_LOCKBREAK
111#define raw_spin_is_contended(lock) ((lock)->break_lock)
112#else
113
114#ifdef arch_spin_is_contended 110#ifdef arch_spin_is_contended
115#define raw_spin_is_contended(lock) arch_spin_is_contended(&(lock)->raw_lock) 111#define raw_spin_is_contended(lock) arch_spin_is_contended(&(lock)->raw_lock)
116#else 112#else
117#define raw_spin_is_contended(lock) (((void)(lock), 0)) 113#define raw_spin_is_contended(lock) (((void)(lock), 0))
118#endif /*arch_spin_is_contended*/ 114#endif /*arch_spin_is_contended*/
119#endif
120 115
121/* 116/*
122 * This barrier must provide two things: 117 * This barrier must provide two things:
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 73548eb13a5d..24b4e6f2c1a2 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -19,9 +19,6 @@
19 19
20typedef struct raw_spinlock { 20typedef struct raw_spinlock {
21 arch_spinlock_t raw_lock; 21 arch_spinlock_t raw_lock;
22#ifdef CONFIG_GENERIC_LOCKBREAK
23 unsigned int break_lock;
24#endif
25#ifdef CONFIG_DEBUG_SPINLOCK 22#ifdef CONFIG_DEBUG_SPINLOCK
26 unsigned int magic, owner_cpu; 23 unsigned int magic, owner_cpu;
27 void *owner; 24 void *owner;
diff --git a/include/linux/string.h b/include/linux/string.h
index 410ecf17de3c..cfd83eb2f926 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -259,7 +259,10 @@ __FORTIFY_INLINE __kernel_size_t strlen(const char *p)
259{ 259{
260 __kernel_size_t ret; 260 __kernel_size_t ret;
261 size_t p_size = __builtin_object_size(p, 0); 261 size_t p_size = __builtin_object_size(p, 0);
262 if (p_size == (size_t)-1) 262
263 /* Work around gcc excess stack consumption issue */
264 if (p_size == (size_t)-1 ||
265 (__builtin_constant_p(p[p_size - 1]) && p[p_size - 1] == '\0'))
263 return __builtin_strlen(p); 266 return __builtin_strlen(p);
264 ret = strnlen(p, p_size); 267 ret = strnlen(p, p_size);
265 if (p_size <= ret) 268 if (p_size <= ret)
diff --git a/include/linux/tick.h b/include/linux/tick.h
index f442d1a42025..7cc35921218e 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -119,6 +119,7 @@ extern void tick_nohz_idle_exit(void);
119extern void tick_nohz_irq_exit(void); 119extern void tick_nohz_irq_exit(void);
120extern ktime_t tick_nohz_get_sleep_length(void); 120extern ktime_t tick_nohz_get_sleep_length(void);
121extern unsigned long tick_nohz_get_idle_calls(void); 121extern unsigned long tick_nohz_get_idle_calls(void);
122extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu);
122extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); 123extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
123extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); 124extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
124#else /* !CONFIG_NO_HZ_COMMON */ 125#else /* !CONFIG_NO_HZ_COMMON */
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 04af640ea95b..2448f9cc48a3 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -207,9 +207,11 @@ unsigned long round_jiffies_up(unsigned long j);
207unsigned long round_jiffies_up_relative(unsigned long j); 207unsigned long round_jiffies_up_relative(unsigned long j);
208 208
209#ifdef CONFIG_HOTPLUG_CPU 209#ifdef CONFIG_HOTPLUG_CPU
210int timers_prepare_cpu(unsigned int cpu);
210int timers_dead_cpu(unsigned int cpu); 211int timers_dead_cpu(unsigned int cpu);
211#else 212#else
212#define timers_dead_cpu NULL 213#define timers_prepare_cpu NULL
214#define timers_dead_cpu NULL
213#endif 215#endif
214 216
215#endif 217#endif
diff --git a/include/linux/trace.h b/include/linux/trace.h
index d24991c1fef3..b95ffb2188ab 100644
--- a/include/linux/trace.h
+++ b/include/linux/trace.h
@@ -18,7 +18,7 @@
18 */ 18 */
19struct trace_export { 19struct trace_export {
20 struct trace_export __rcu *next; 20 struct trace_export __rcu *next;
21 void (*write)(const void *, unsigned int); 21 void (*write)(struct trace_export *, const void *, unsigned int);
22}; 22};
23 23
24int register_ftrace_export(struct trace_export *export); 24int register_ftrace_export(struct trace_export *export);
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8b8118a7fadb..cb4d92b79cd9 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3226,7 +3226,6 @@ struct cfg80211_ops {
3226 * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN. 3226 * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN.
3227 * @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing 3227 * @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing
3228 * auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH. 3228 * auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH.
3229 * @WIPHY_FLAG_SUPPORTS_SCHED_SCAN: The device supports scheduled scans.
3230 * @WIPHY_FLAG_SUPPORTS_FW_ROAM: The device supports roaming feature in the 3229 * @WIPHY_FLAG_SUPPORTS_FW_ROAM: The device supports roaming feature in the
3231 * firmware. 3230 * firmware.
3232 * @WIPHY_FLAG_AP_UAPSD: The device supports uapsd on AP. 3231 * @WIPHY_FLAG_AP_UAPSD: The device supports uapsd on AP.
diff --git a/include/net/gue.h b/include/net/gue.h
index 2fdb29ca74c2..fdad41469b65 100644
--- a/include/net/gue.h
+++ b/include/net/gue.h
@@ -44,10 +44,10 @@ struct guehdr {
44#else 44#else
45#error "Please fix <asm/byteorder.h>" 45#error "Please fix <asm/byteorder.h>"
46#endif 46#endif
47 __u8 proto_ctype; 47 __u8 proto_ctype;
48 __u16 flags; 48 __be16 flags;
49 }; 49 };
50 __u32 word; 50 __be32 word;
51 }; 51 };
52}; 52};
53 53
@@ -84,11 +84,10 @@ static inline size_t guehdr_priv_flags_len(__be32 flags)
84 * if there is an unknown standard or private flags, or the options length for 84 * if there is an unknown standard or private flags, or the options length for
85 * the flags exceeds the options length specific in hlen of the GUE header. 85 * the flags exceeds the options length specific in hlen of the GUE header.
86 */ 86 */
87static inline int validate_gue_flags(struct guehdr *guehdr, 87static inline int validate_gue_flags(struct guehdr *guehdr, size_t optlen)
88 size_t optlen)
89{ 88{
89 __be16 flags = guehdr->flags;
90 size_t len; 90 size_t len;
91 __be32 flags = guehdr->flags;
92 91
93 if (flags & ~GUE_FLAGS_ALL) 92 if (flags & ~GUE_FLAGS_ALL)
94 return 1; 93 return 1;
@@ -101,12 +100,13 @@ static inline int validate_gue_flags(struct guehdr *guehdr,
101 /* Private flags are last four bytes accounted in 100 /* Private flags are last four bytes accounted in
102 * guehdr_flags_len 101 * guehdr_flags_len
103 */ 102 */
104 flags = *(__be32 *)((void *)&guehdr[1] + len - GUE_LEN_PRIV); 103 __be32 pflags = *(__be32 *)((void *)&guehdr[1] +
104 len - GUE_LEN_PRIV);
105 105
106 if (flags & ~GUE_PFLAGS_ALL) 106 if (pflags & ~GUE_PFLAGS_ALL)
107 return 1; 107 return 1;
108 108
109 len += guehdr_priv_flags_len(flags); 109 len += guehdr_priv_flags_len(pflags);
110 if (len > optlen) 110 if (len > optlen)
111 return 1; 111 return 1;
112 } 112 }
diff --git a/include/net/ip.h b/include/net/ip.h
index 9896f46cbbf1..af8addbaa3c1 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -34,6 +34,7 @@
34#include <net/flow_dissector.h> 34#include <net/flow_dissector.h>
35 35
36#define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */ 36#define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */
37#define IPV4_MIN_MTU 68 /* RFC 791 */
37 38
38struct sock; 39struct sock;
39 40
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 0105445cab83..8e08b6da72f3 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -694,9 +694,7 @@ struct tc_cls_matchall_offload {
694}; 694};
695 695
696enum tc_clsbpf_command { 696enum tc_clsbpf_command {
697 TC_CLSBPF_ADD, 697 TC_CLSBPF_OFFLOAD,
698 TC_CLSBPF_REPLACE,
699 TC_CLSBPF_DESTROY,
700 TC_CLSBPF_STATS, 698 TC_CLSBPF_STATS,
701}; 699};
702 700
@@ -705,6 +703,7 @@ struct tc_cls_bpf_offload {
705 enum tc_clsbpf_command command; 703 enum tc_clsbpf_command command;
706 struct tcf_exts *exts; 704 struct tcf_exts *exts;
707 struct bpf_prog *prog; 705 struct bpf_prog *prog;
706 struct bpf_prog *oldprog;
708 const char *name; 707 const char *name;
709 bool exts_integrated; 708 bool exts_integrated;
710 u32 gen_flags; 709 u32 gen_flags;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 65d0d25f2648..83a3e47d5845 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -71,6 +71,7 @@ struct Qdisc {
71 * qdisc_tree_decrease_qlen() should stop. 71 * qdisc_tree_decrease_qlen() should stop.
72 */ 72 */
73#define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */ 73#define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */
74#define TCQ_F_OFFLOADED 0x200 /* qdisc is offloaded to HW */
74 u32 limit; 75 u32 limit;
75 const struct Qdisc_ops *ops; 76 const struct Qdisc_ops *ops;
76 struct qdisc_size_table __rcu *stab; 77 struct qdisc_size_table __rcu *stab;
diff --git a/include/net/sock.h b/include/net/sock.h
index 9155da422692..7a7b14e9628a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1514,6 +1514,11 @@ static inline bool sock_owned_by_user(const struct sock *sk)
1514 return sk->sk_lock.owned; 1514 return sk->sk_lock.owned;
1515} 1515}
1516 1516
1517static inline bool sock_owned_by_user_nocheck(const struct sock *sk)
1518{
1519 return sk->sk_lock.owned;
1520}
1521
1517/* no reclassification while locks are held */ 1522/* no reclassification while locks are held */
1518static inline bool sock_allow_reclassification(const struct sock *csk) 1523static inline bool sock_allow_reclassification(const struct sock *csk)
1519{ 1524{
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index dc28a98ce97c..ae35991b5877 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1570,6 +1570,9 @@ int xfrm_init_state(struct xfrm_state *x);
1570int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb); 1570int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb);
1571int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); 1571int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
1572int xfrm_input_resume(struct sk_buff *skb, int nexthdr); 1572int xfrm_input_resume(struct sk_buff *skb, int nexthdr);
1573int xfrm_trans_queue(struct sk_buff *skb,
1574 int (*finish)(struct net *, struct sock *,
1575 struct sk_buff *));
1573int xfrm_output_resume(struct sk_buff *skb, int err); 1576int xfrm_output_resume(struct sk_buff *skb, int err);
1574int xfrm_output(struct sock *sk, struct sk_buff *skb); 1577int xfrm_output(struct sock *sk, struct sk_buff *skb);
1575int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); 1578int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb);
diff --git a/include/trace/events/clk.h b/include/trace/events/clk.h
index 758607226bfd..2cd449328aee 100644
--- a/include/trace/events/clk.h
+++ b/include/trace/events/clk.h
@@ -134,12 +134,12 @@ DECLARE_EVENT_CLASS(clk_parent,
134 134
135 TP_STRUCT__entry( 135 TP_STRUCT__entry(
136 __string( name, core->name ) 136 __string( name, core->name )
137 __string( pname, parent->name ) 137 __string( pname, parent ? parent->name : "none" )
138 ), 138 ),
139 139
140 TP_fast_assign( 140 TP_fast_assign(
141 __assign_str(name, core->name); 141 __assign_str(name, core->name);
142 __assign_str(pname, parent->name); 142 __assign_str(pname, parent ? parent->name : "none");
143 ), 143 ),
144 144
145 TP_printk("%s %s", __get_str(name), __get_str(pname)) 145 TP_printk("%s %s", __get_str(name), __get_str(pname))
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index e4b0b8e09932..2c735a3e6613 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -211,7 +211,7 @@ TRACE_EVENT(kvm_ack_irq,
211 { KVM_TRACE_MMIO_WRITE, "write" } 211 { KVM_TRACE_MMIO_WRITE, "write" }
212 212
213TRACE_EVENT(kvm_mmio, 213TRACE_EVENT(kvm_mmio,
214 TP_PROTO(int type, int len, u64 gpa, u64 val), 214 TP_PROTO(int type, int len, u64 gpa, void *val),
215 TP_ARGS(type, len, gpa, val), 215 TP_ARGS(type, len, gpa, val),
216 216
217 TP_STRUCT__entry( 217 TP_STRUCT__entry(
@@ -225,7 +225,10 @@ TRACE_EVENT(kvm_mmio,
225 __entry->type = type; 225 __entry->type = type;
226 __entry->len = len; 226 __entry->len = len;
227 __entry->gpa = gpa; 227 __entry->gpa = gpa;
228 __entry->val = val; 228 __entry->val = 0;
229 if (val)
230 memcpy(&__entry->val, val,
231 min_t(u32, sizeof(__entry->val), len));
229 ), 232 ),
230 233
231 TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx", 234 TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
diff --git a/include/trace/events/preemptirq.h b/include/trace/events/preemptirq.h
index f5024c560d8f..9c4eb33c5a1d 100644
--- a/include/trace/events/preemptirq.h
+++ b/include/trace/events/preemptirq.h
@@ -56,15 +56,18 @@ DEFINE_EVENT(preemptirq_template, preempt_enable,
56 56
57#include <trace/define_trace.h> 57#include <trace/define_trace.h>
58 58
59#else /* !CONFIG_PREEMPTIRQ_EVENTS */ 59#endif /* !CONFIG_PREEMPTIRQ_EVENTS */
60 60
61#if !defined(CONFIG_PREEMPTIRQ_EVENTS) || defined(CONFIG_PROVE_LOCKING)
61#define trace_irq_enable(...) 62#define trace_irq_enable(...)
62#define trace_irq_disable(...) 63#define trace_irq_disable(...)
63#define trace_preempt_enable(...)
64#define trace_preempt_disable(...)
65#define trace_irq_enable_rcuidle(...) 64#define trace_irq_enable_rcuidle(...)
66#define trace_irq_disable_rcuidle(...) 65#define trace_irq_disable_rcuidle(...)
66#endif
67
68#if !defined(CONFIG_PREEMPTIRQ_EVENTS) || !defined(CONFIG_DEBUG_PREEMPT)
69#define trace_preempt_enable(...)
70#define trace_preempt_disable(...)
67#define trace_preempt_enable_rcuidle(...) 71#define trace_preempt_enable_rcuidle(...)
68#define trace_preempt_disable_rcuidle(...) 72#define trace_preempt_disable_rcuidle(...)
69
70#endif 73#endif
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 07cccca6cbf1..ab34c561f26b 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -25,6 +25,35 @@
25 tcp_state_name(TCP_CLOSING), \ 25 tcp_state_name(TCP_CLOSING), \
26 tcp_state_name(TCP_NEW_SYN_RECV)) 26 tcp_state_name(TCP_NEW_SYN_RECV))
27 27
28#define TP_STORE_V4MAPPED(__entry, saddr, daddr) \
29 do { \
30 struct in6_addr *pin6; \
31 \
32 pin6 = (struct in6_addr *)__entry->saddr_v6; \
33 ipv6_addr_set_v4mapped(saddr, pin6); \
34 pin6 = (struct in6_addr *)__entry->daddr_v6; \
35 ipv6_addr_set_v4mapped(daddr, pin6); \
36 } while (0)
37
38#if IS_ENABLED(CONFIG_IPV6)
39#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6) \
40 do { \
41 if (sk->sk_family == AF_INET6) { \
42 struct in6_addr *pin6; \
43 \
44 pin6 = (struct in6_addr *)__entry->saddr_v6; \
45 *pin6 = saddr6; \
46 pin6 = (struct in6_addr *)__entry->daddr_v6; \
47 *pin6 = daddr6; \
48 } else { \
49 TP_STORE_V4MAPPED(__entry, saddr, daddr); \
50 } \
51 } while (0)
52#else
53#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6) \
54 TP_STORE_V4MAPPED(__entry, saddr, daddr)
55#endif
56
28/* 57/*
29 * tcp event with arguments sk and skb 58 * tcp event with arguments sk and skb
30 * 59 *
@@ -50,7 +79,6 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb,
50 79
51 TP_fast_assign( 80 TP_fast_assign(
52 struct inet_sock *inet = inet_sk(sk); 81 struct inet_sock *inet = inet_sk(sk);
53 struct in6_addr *pin6;
54 __be32 *p32; 82 __be32 *p32;
55 83
56 __entry->skbaddr = skb; 84 __entry->skbaddr = skb;
@@ -65,20 +93,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb,
65 p32 = (__be32 *) __entry->daddr; 93 p32 = (__be32 *) __entry->daddr;
66 *p32 = inet->inet_daddr; 94 *p32 = inet->inet_daddr;
67 95
68#if IS_ENABLED(CONFIG_IPV6) 96 TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
69 if (sk->sk_family == AF_INET6) { 97 sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
70 pin6 = (struct in6_addr *)__entry->saddr_v6;
71 *pin6 = sk->sk_v6_rcv_saddr;
72 pin6 = (struct in6_addr *)__entry->daddr_v6;
73 *pin6 = sk->sk_v6_daddr;
74 } else
75#endif
76 {
77 pin6 = (struct in6_addr *)__entry->saddr_v6;
78 ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
79 pin6 = (struct in6_addr *)__entry->daddr_v6;
80 ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
81 }
82 ), 98 ),
83 99
84 TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", 100 TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
@@ -127,7 +143,6 @@ DECLARE_EVENT_CLASS(tcp_event_sk,
127 143
128 TP_fast_assign( 144 TP_fast_assign(
129 struct inet_sock *inet = inet_sk(sk); 145 struct inet_sock *inet = inet_sk(sk);
130 struct in6_addr *pin6;
131 __be32 *p32; 146 __be32 *p32;
132 147
133 __entry->skaddr = sk; 148 __entry->skaddr = sk;
@@ -141,20 +156,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk,
141 p32 = (__be32 *) __entry->daddr; 156 p32 = (__be32 *) __entry->daddr;
142 *p32 = inet->inet_daddr; 157 *p32 = inet->inet_daddr;
143 158
144#if IS_ENABLED(CONFIG_IPV6) 159 TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
145 if (sk->sk_family == AF_INET6) { 160 sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
146 pin6 = (struct in6_addr *)__entry->saddr_v6;
147 *pin6 = sk->sk_v6_rcv_saddr;
148 pin6 = (struct in6_addr *)__entry->daddr_v6;
149 *pin6 = sk->sk_v6_daddr;
150 } else
151#endif
152 {
153 pin6 = (struct in6_addr *)__entry->saddr_v6;
154 ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
155 pin6 = (struct in6_addr *)__entry->daddr_v6;
156 ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
157 }
158 ), 161 ),
159 162
160 TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", 163 TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
@@ -197,7 +200,6 @@ TRACE_EVENT(tcp_set_state,
197 200
198 TP_fast_assign( 201 TP_fast_assign(
199 struct inet_sock *inet = inet_sk(sk); 202 struct inet_sock *inet = inet_sk(sk);
200 struct in6_addr *pin6;
201 __be32 *p32; 203 __be32 *p32;
202 204
203 __entry->skaddr = sk; 205 __entry->skaddr = sk;
@@ -213,20 +215,8 @@ TRACE_EVENT(tcp_set_state,
213 p32 = (__be32 *) __entry->daddr; 215 p32 = (__be32 *) __entry->daddr;
214 *p32 = inet->inet_daddr; 216 *p32 = inet->inet_daddr;
215 217
216#if IS_ENABLED(CONFIG_IPV6) 218 TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
217 if (sk->sk_family == AF_INET6) { 219 sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
218 pin6 = (struct in6_addr *)__entry->saddr_v6;
219 *pin6 = sk->sk_v6_rcv_saddr;
220 pin6 = (struct in6_addr *)__entry->daddr_v6;
221 *pin6 = sk->sk_v6_daddr;
222 } else
223#endif
224 {
225 pin6 = (struct in6_addr *)__entry->saddr_v6;
226 ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
227 pin6 = (struct in6_addr *)__entry->daddr_v6;
228 ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
229 }
230 ), 220 ),
231 221
232 TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s", 222 TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s",
@@ -256,7 +246,6 @@ TRACE_EVENT(tcp_retransmit_synack,
256 246
257 TP_fast_assign( 247 TP_fast_assign(
258 struct inet_request_sock *ireq = inet_rsk(req); 248 struct inet_request_sock *ireq = inet_rsk(req);
259 struct in6_addr *pin6;
260 __be32 *p32; 249 __be32 *p32;
261 250
262 __entry->skaddr = sk; 251 __entry->skaddr = sk;
@@ -271,20 +260,8 @@ TRACE_EVENT(tcp_retransmit_synack,
271 p32 = (__be32 *) __entry->daddr; 260 p32 = (__be32 *) __entry->daddr;
272 *p32 = ireq->ir_rmt_addr; 261 *p32 = ireq->ir_rmt_addr;
273 262
274#if IS_ENABLED(CONFIG_IPV6) 263 TP_STORE_ADDRS(__entry, ireq->ir_loc_addr, ireq->ir_rmt_addr,
275 if (sk->sk_family == AF_INET6) { 264 ireq->ir_v6_loc_addr, ireq->ir_v6_rmt_addr);
276 pin6 = (struct in6_addr *)__entry->saddr_v6;
277 *pin6 = ireq->ir_v6_loc_addr;
278 pin6 = (struct in6_addr *)__entry->daddr_v6;
279 *pin6 = ireq->ir_v6_rmt_addr;
280 } else
281#endif
282 {
283 pin6 = (struct in6_addr *)__entry->saddr_v6;
284 ipv6_addr_set_v4mapped(ireq->ir_loc_addr, pin6);
285 pin6 = (struct in6_addr *)__entry->daddr_v6;
286 ipv6_addr_set_v4mapped(ireq->ir_rmt_addr, pin6);
287 }
288 ), 265 ),
289 266
290 TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", 267 TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index af3cc2f4e1ad..37b5096ae97b 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -256,7 +256,6 @@ struct tc_red_qopt {
256#define TC_RED_ECN 1 256#define TC_RED_ECN 1
257#define TC_RED_HARDDROP 2 257#define TC_RED_HARDDROP 2
258#define TC_RED_ADAPTATIVE 4 258#define TC_RED_ADAPTATIVE 4
259#define TC_RED_OFFLOADED 8
260}; 259};
261 260
262struct tc_red_xstats { 261struct tc_red_xstats {
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index d8b5f80c2ea6..843e29aa3cac 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -557,6 +557,7 @@ enum {
557 TCA_PAD, 557 TCA_PAD,
558 TCA_DUMP_INVISIBLE, 558 TCA_DUMP_INVISIBLE,
559 TCA_CHAIN, 559 TCA_CHAIN,
560 TCA_HW_OFFLOAD,
560 __TCA_MAX 561 __TCA_MAX
561}; 562};
562 563
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index 4914b93a23f2..61f410fd74e4 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -44,3 +44,8 @@ static inline void xen_balloon_init(void)
44{ 44{
45} 45}
46#endif 46#endif
47
48#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
49struct resource;
50void arch_xen_balloon_init(struct resource *hostmem_resource);
51#endif
diff --git a/init/Kconfig b/init/Kconfig
index 2934249fba46..690a381adee0 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -461,10 +461,14 @@ endmenu # "CPU/Task time and stats accounting"
461 461
462config CPU_ISOLATION 462config CPU_ISOLATION
463 bool "CPU isolation" 463 bool "CPU isolation"
464 default y
464 help 465 help
465 Make sure that CPUs running critical tasks are not disturbed by 466 Make sure that CPUs running critical tasks are not disturbed by
466 any source of "noise" such as unbound workqueues, timers, kthreads... 467 any source of "noise" such as unbound workqueues, timers, kthreads...
467 Unbound jobs get offloaded to housekeeping CPUs. 468 Unbound jobs get offloaded to housekeeping CPUs. This is driven by
469 the "isolcpus=" boot parameter.
470
471 Say Y if unsure.
468 472
469source "kernel/rcu/Kconfig" 473source "kernel/rcu/Kconfig"
470 474
diff --git a/init/main.c b/init/main.c
index dfec3809e740..a8100b954839 100644
--- a/init/main.c
+++ b/init/main.c
@@ -75,6 +75,7 @@
75#include <linux/slab.h> 75#include <linux/slab.h>
76#include <linux/perf_event.h> 76#include <linux/perf_event.h>
77#include <linux/ptrace.h> 77#include <linux/ptrace.h>
78#include <linux/pti.h>
78#include <linux/blkdev.h> 79#include <linux/blkdev.h>
79#include <linux/elevator.h> 80#include <linux/elevator.h>
80#include <linux/sched_clock.h> 81#include <linux/sched_clock.h>
@@ -504,6 +505,10 @@ static void __init mm_init(void)
504 pgtable_init(); 505 pgtable_init();
505 vmalloc_init(); 506 vmalloc_init();
506 ioremap_huge_init(); 507 ioremap_huge_init();
508 /* Should be run before the first non-init thread is created */
509 init_espfix_bsp();
510 /* Should be run after espfix64 is set up. */
511 pti_init();
507} 512}
508 513
509asmlinkage __visible void __init start_kernel(void) 514asmlinkage __visible void __init start_kernel(void)
@@ -589,6 +594,12 @@ asmlinkage __visible void __init start_kernel(void)
589 radix_tree_init(); 594 radix_tree_init();
590 595
591 /* 596 /*
597 * Set up housekeeping before setting up workqueues to allow the unbound
598 * workqueue to take non-housekeeping into account.
599 */
600 housekeeping_init();
601
602 /*
592 * Allow workqueue creation and work item queueing/cancelling 603 * Allow workqueue creation and work item queueing/cancelling
593 * early. Work item execution depends on kthreads and starts after 604 * early. Work item execution depends on kthreads and starts after
594 * workqueue_init(). 605 * workqueue_init().
@@ -605,7 +616,6 @@ asmlinkage __visible void __init start_kernel(void)
605 early_irq_init(); 616 early_irq_init();
606 init_IRQ(); 617 init_IRQ();
607 tick_init(); 618 tick_init();
608 housekeeping_init();
609 rcu_init_nohz(); 619 rcu_init_nohz();
610 init_timers(); 620 init_timers();
611 hrtimers_init(); 621 hrtimers_init();
@@ -674,10 +684,6 @@ asmlinkage __visible void __init start_kernel(void)
674 if (efi_enabled(EFI_RUNTIME_SERVICES)) 684 if (efi_enabled(EFI_RUNTIME_SERVICES))
675 efi_enter_virtual_mode(); 685 efi_enter_virtual_mode();
676#endif 686#endif
677#ifdef CONFIG_X86_ESPFIX64
678 /* Should be run before the first non-init thread is created */
679 init_espfix_bsp();
680#endif
681 thread_stack_cache_init(); 687 thread_stack_cache_init();
682 cred_init(); 688 cred_init();
683 fork_init(); 689 fork_init();
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index e469e05c8e83..3905d4bc5b80 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -114,6 +114,7 @@ static void htab_free_elems(struct bpf_htab *htab)
114 pptr = htab_elem_get_ptr(get_htab_elem(htab, i), 114 pptr = htab_elem_get_ptr(get_htab_elem(htab, i),
115 htab->map.key_size); 115 htab->map.key_size);
116 free_percpu(pptr); 116 free_percpu(pptr);
117 cond_resched();
117 } 118 }
118free_elems: 119free_elems:
119 bpf_map_area_free(htab->elems); 120 bpf_map_area_free(htab->elems);
@@ -159,6 +160,7 @@ static int prealloc_init(struct bpf_htab *htab)
159 goto free_elems; 160 goto free_elems;
160 htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size, 161 htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
161 pptr); 162 pptr);
163 cond_resched();
162 } 164 }
163 165
164skip_percpu_elems: 166skip_percpu_elems:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d4593571c404..04b24876cd23 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1059,6 +1059,11 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
1059 break; 1059 break;
1060 case PTR_TO_STACK: 1060 case PTR_TO_STACK:
1061 pointer_desc = "stack "; 1061 pointer_desc = "stack ";
1062 /* The stack spill tracking logic in check_stack_write()
1063 * and check_stack_read() relies on stack accesses being
1064 * aligned.
1065 */
1066 strict = true;
1062 break; 1067 break;
1063 default: 1068 default:
1064 break; 1069 break;
@@ -1067,6 +1072,29 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
1067 strict); 1072 strict);
1068} 1073}
1069 1074
1075/* truncate register to smaller size (in bytes)
1076 * must be called with size < BPF_REG_SIZE
1077 */
1078static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
1079{
1080 u64 mask;
1081
1082 /* clear high bits in bit representation */
1083 reg->var_off = tnum_cast(reg->var_off, size);
1084
1085 /* fix arithmetic bounds */
1086 mask = ((u64)1 << (size * 8)) - 1;
1087 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
1088 reg->umin_value &= mask;
1089 reg->umax_value &= mask;
1090 } else {
1091 reg->umin_value = 0;
1092 reg->umax_value = mask;
1093 }
1094 reg->smin_value = reg->umin_value;
1095 reg->smax_value = reg->umax_value;
1096}
1097
1070/* check whether memory at (regno + off) is accessible for t = (read | write) 1098/* check whether memory at (regno + off) is accessible for t = (read | write)
1071 * if t==write, value_regno is a register which value is stored into memory 1099 * if t==write, value_regno is a register which value is stored into memory
1072 * if t==read, value_regno is a register which will receive the value from memory 1100 * if t==read, value_regno is a register which will receive the value from memory
@@ -1200,9 +1228,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
1200 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && 1228 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
1201 regs[value_regno].type == SCALAR_VALUE) { 1229 regs[value_regno].type == SCALAR_VALUE) {
1202 /* b/h/w load zero-extends, mark upper bits as known 0 */ 1230 /* b/h/w load zero-extends, mark upper bits as known 0 */
1203 regs[value_regno].var_off = 1231 coerce_reg_to_size(&regs[value_regno], size);
1204 tnum_cast(regs[value_regno].var_off, size);
1205 __update_reg_bounds(&regs[value_regno]);
1206 } 1232 }
1207 return err; 1233 return err;
1208} 1234}
@@ -1282,6 +1308,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
1282 tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off); 1308 tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off);
1283 verbose(env, "invalid variable stack read R%d var_off=%s\n", 1309 verbose(env, "invalid variable stack read R%d var_off=%s\n",
1284 regno, tn_buf); 1310 regno, tn_buf);
1311 return -EACCES;
1285 } 1312 }
1286 off = regs[regno].off + regs[regno].var_off.value; 1313 off = regs[regno].off + regs[regno].var_off.value;
1287 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || 1314 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
@@ -1674,7 +1701,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
1674 return -EINVAL; 1701 return -EINVAL;
1675 } 1702 }
1676 1703
1704 /* With LD_ABS/IND some JITs save/restore skb from r1. */
1677 changes_data = bpf_helper_changes_pkt_data(fn->func); 1705 changes_data = bpf_helper_changes_pkt_data(fn->func);
1706 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
1707 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
1708 func_id_name(func_id), func_id);
1709 return -EINVAL;
1710 }
1678 1711
1679 memset(&meta, 0, sizeof(meta)); 1712 memset(&meta, 0, sizeof(meta));
1680 meta.pkt_access = fn->pkt_access; 1713 meta.pkt_access = fn->pkt_access;
@@ -1766,14 +1799,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
1766 return 0; 1799 return 0;
1767} 1800}
1768 1801
1769static void coerce_reg_to_32(struct bpf_reg_state *reg)
1770{
1771 /* clear high 32 bits */
1772 reg->var_off = tnum_cast(reg->var_off, 4);
1773 /* Update bounds */
1774 __update_reg_bounds(reg);
1775}
1776
1777static bool signed_add_overflows(s64 a, s64 b) 1802static bool signed_add_overflows(s64 a, s64 b)
1778{ 1803{
1779 /* Do the add in u64, where overflow is well-defined */ 1804 /* Do the add in u64, where overflow is well-defined */
@@ -1794,6 +1819,41 @@ static bool signed_sub_overflows(s64 a, s64 b)
1794 return res > a; 1819 return res > a;
1795} 1820}
1796 1821
1822static bool check_reg_sane_offset(struct bpf_verifier_env *env,
1823 const struct bpf_reg_state *reg,
1824 enum bpf_reg_type type)
1825{
1826 bool known = tnum_is_const(reg->var_off);
1827 s64 val = reg->var_off.value;
1828 s64 smin = reg->smin_value;
1829
1830 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
1831 verbose(env, "math between %s pointer and %lld is not allowed\n",
1832 reg_type_str[type], val);
1833 return false;
1834 }
1835
1836 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
1837 verbose(env, "%s pointer offset %d is not allowed\n",
1838 reg_type_str[type], reg->off);
1839 return false;
1840 }
1841
1842 if (smin == S64_MIN) {
1843 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
1844 reg_type_str[type]);
1845 return false;
1846 }
1847
1848 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
1849 verbose(env, "value %lld makes %s pointer be out of bounds\n",
1850 smin, reg_type_str[type]);
1851 return false;
1852 }
1853
1854 return true;
1855}
1856
1797/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. 1857/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
1798 * Caller should also handle BPF_MOV case separately. 1858 * Caller should also handle BPF_MOV case separately.
1799 * If we return -EACCES, caller may want to try again treating pointer as a 1859 * If we return -EACCES, caller may want to try again treating pointer as a
@@ -1830,29 +1890,25 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1830 1890
1831 if (BPF_CLASS(insn->code) != BPF_ALU64) { 1891 if (BPF_CLASS(insn->code) != BPF_ALU64) {
1832 /* 32-bit ALU ops on pointers produce (meaningless) scalars */ 1892 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
1833 if (!env->allow_ptr_leaks) 1893 verbose(env,
1834 verbose(env, 1894 "R%d 32-bit pointer arithmetic prohibited\n",
1835 "R%d 32-bit pointer arithmetic prohibited\n", 1895 dst);
1836 dst);
1837 return -EACCES; 1896 return -EACCES;
1838 } 1897 }
1839 1898
1840 if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { 1899 if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
1841 if (!env->allow_ptr_leaks) 1900 verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
1842 verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n", 1901 dst);
1843 dst);
1844 return -EACCES; 1902 return -EACCES;
1845 } 1903 }
1846 if (ptr_reg->type == CONST_PTR_TO_MAP) { 1904 if (ptr_reg->type == CONST_PTR_TO_MAP) {
1847 if (!env->allow_ptr_leaks) 1905 verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
1848 verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n", 1906 dst);
1849 dst);
1850 return -EACCES; 1907 return -EACCES;
1851 } 1908 }
1852 if (ptr_reg->type == PTR_TO_PACKET_END) { 1909 if (ptr_reg->type == PTR_TO_PACKET_END) {
1853 if (!env->allow_ptr_leaks) 1910 verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
1854 verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n", 1911 dst);
1855 dst);
1856 return -EACCES; 1912 return -EACCES;
1857 } 1913 }
1858 1914
@@ -1862,6 +1918,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1862 dst_reg->type = ptr_reg->type; 1918 dst_reg->type = ptr_reg->type;
1863 dst_reg->id = ptr_reg->id; 1919 dst_reg->id = ptr_reg->id;
1864 1920
1921 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
1922 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
1923 return -EINVAL;
1924
1865 switch (opcode) { 1925 switch (opcode) {
1866 case BPF_ADD: 1926 case BPF_ADD:
1867 /* We can take a fixed offset as long as it doesn't overflow 1927 /* We can take a fixed offset as long as it doesn't overflow
@@ -1915,9 +1975,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1915 case BPF_SUB: 1975 case BPF_SUB:
1916 if (dst_reg == off_reg) { 1976 if (dst_reg == off_reg) {
1917 /* scalar -= pointer. Creates an unknown scalar */ 1977 /* scalar -= pointer. Creates an unknown scalar */
1918 if (!env->allow_ptr_leaks) 1978 verbose(env, "R%d tried to subtract pointer from scalar\n",
1919 verbose(env, "R%d tried to subtract pointer from scalar\n", 1979 dst);
1920 dst);
1921 return -EACCES; 1980 return -EACCES;
1922 } 1981 }
1923 /* We don't allow subtraction from FP, because (according to 1982 /* We don't allow subtraction from FP, because (according to
@@ -1925,9 +1984,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1925 * be able to deal with it. 1984 * be able to deal with it.
1926 */ 1985 */
1927 if (ptr_reg->type == PTR_TO_STACK) { 1986 if (ptr_reg->type == PTR_TO_STACK) {
1928 if (!env->allow_ptr_leaks) 1987 verbose(env, "R%d subtraction from stack pointer prohibited\n",
1929 verbose(env, "R%d subtraction from stack pointer prohibited\n", 1988 dst);
1930 dst);
1931 return -EACCES; 1989 return -EACCES;
1932 } 1990 }
1933 if (known && (ptr_reg->off - smin_val == 1991 if (known && (ptr_reg->off - smin_val ==
@@ -1976,28 +2034,30 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1976 case BPF_AND: 2034 case BPF_AND:
1977 case BPF_OR: 2035 case BPF_OR:
1978 case BPF_XOR: 2036 case BPF_XOR:
1979 /* bitwise ops on pointers are troublesome, prohibit for now. 2037 /* bitwise ops on pointers are troublesome, prohibit. */
1980 * (However, in principle we could allow some cases, e.g. 2038 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
1981 * ptr &= ~3 which would reduce min_value by 3.) 2039 dst, bpf_alu_string[opcode >> 4]);
1982 */
1983 if (!env->allow_ptr_leaks)
1984 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
1985 dst, bpf_alu_string[opcode >> 4]);
1986 return -EACCES; 2040 return -EACCES;
1987 default: 2041 default:
1988 /* other operators (e.g. MUL,LSH) produce non-pointer results */ 2042 /* other operators (e.g. MUL,LSH) produce non-pointer results */
1989 if (!env->allow_ptr_leaks) 2043 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
1990 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n", 2044 dst, bpf_alu_string[opcode >> 4]);
1991 dst, bpf_alu_string[opcode >> 4]);
1992 return -EACCES; 2045 return -EACCES;
1993 } 2046 }
1994 2047
2048 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
2049 return -EINVAL;
2050
1995 __update_reg_bounds(dst_reg); 2051 __update_reg_bounds(dst_reg);
1996 __reg_deduce_bounds(dst_reg); 2052 __reg_deduce_bounds(dst_reg);
1997 __reg_bound_offset(dst_reg); 2053 __reg_bound_offset(dst_reg);
1998 return 0; 2054 return 0;
1999} 2055}
2000 2056
2057/* WARNING: This function does calculations on 64-bit values, but the actual
2058 * execution may occur on 32-bit values. Therefore, things like bitshifts
2059 * need extra checks in the 32-bit case.
2060 */
2001static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, 2061static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2002 struct bpf_insn *insn, 2062 struct bpf_insn *insn,
2003 struct bpf_reg_state *dst_reg, 2063 struct bpf_reg_state *dst_reg,
@@ -2008,12 +2068,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2008 bool src_known, dst_known; 2068 bool src_known, dst_known;
2009 s64 smin_val, smax_val; 2069 s64 smin_val, smax_val;
2010 u64 umin_val, umax_val; 2070 u64 umin_val, umax_val;
2071 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
2011 2072
2012 if (BPF_CLASS(insn->code) != BPF_ALU64) {
2013 /* 32-bit ALU ops are (32,32)->64 */
2014 coerce_reg_to_32(dst_reg);
2015 coerce_reg_to_32(&src_reg);
2016 }
2017 smin_val = src_reg.smin_value; 2073 smin_val = src_reg.smin_value;
2018 smax_val = src_reg.smax_value; 2074 smax_val = src_reg.smax_value;
2019 umin_val = src_reg.umin_value; 2075 umin_val = src_reg.umin_value;
@@ -2021,6 +2077,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2021 src_known = tnum_is_const(src_reg.var_off); 2077 src_known = tnum_is_const(src_reg.var_off);
2022 dst_known = tnum_is_const(dst_reg->var_off); 2078 dst_known = tnum_is_const(dst_reg->var_off);
2023 2079
2080 if (!src_known &&
2081 opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
2082 __mark_reg_unknown(dst_reg);
2083 return 0;
2084 }
2085
2024 switch (opcode) { 2086 switch (opcode) {
2025 case BPF_ADD: 2087 case BPF_ADD:
2026 if (signed_add_overflows(dst_reg->smin_value, smin_val) || 2088 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
@@ -2149,9 +2211,9 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2149 __update_reg_bounds(dst_reg); 2211 __update_reg_bounds(dst_reg);
2150 break; 2212 break;
2151 case BPF_LSH: 2213 case BPF_LSH:
2152 if (umax_val > 63) { 2214 if (umax_val >= insn_bitness) {
2153 /* Shifts greater than 63 are undefined. This includes 2215 /* Shifts greater than 31 or 63 are undefined.
2154 * shifts by a negative number. 2216 * This includes shifts by a negative number.
2155 */ 2217 */
2156 mark_reg_unknown(env, regs, insn->dst_reg); 2218 mark_reg_unknown(env, regs, insn->dst_reg);
2157 break; 2219 break;
@@ -2177,27 +2239,29 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2177 __update_reg_bounds(dst_reg); 2239 __update_reg_bounds(dst_reg);
2178 break; 2240 break;
2179 case BPF_RSH: 2241 case BPF_RSH:
2180 if (umax_val > 63) { 2242 if (umax_val >= insn_bitness) {
2181 /* Shifts greater than 63 are undefined. This includes 2243 /* Shifts greater than 31 or 63 are undefined.
2182 * shifts by a negative number. 2244 * This includes shifts by a negative number.
2183 */ 2245 */
2184 mark_reg_unknown(env, regs, insn->dst_reg); 2246 mark_reg_unknown(env, regs, insn->dst_reg);
2185 break; 2247 break;
2186 } 2248 }
2187 /* BPF_RSH is an unsigned shift, so make the appropriate casts */ 2249 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
2188 if (dst_reg->smin_value < 0) { 2250 * be negative, then either:
2189 if (umin_val) { 2251 * 1) src_reg might be zero, so the sign bit of the result is
2190 /* Sign bit will be cleared */ 2252 * unknown, so we lose our signed bounds
2191 dst_reg->smin_value = 0; 2253 * 2) it's known negative, thus the unsigned bounds capture the
2192 } else { 2254 * signed bounds
2193 /* Lost sign bit information */ 2255 * 3) the signed bounds cross zero, so they tell us nothing
2194 dst_reg->smin_value = S64_MIN; 2256 * about the result
2195 dst_reg->smax_value = S64_MAX; 2257 * If the value in dst_reg is known nonnegative, then again the
2196 } 2258 * unsigned bounts capture the signed bounds.
2197 } else { 2259 * Thus, in all cases it suffices to blow away our signed bounds
2198 dst_reg->smin_value = 2260 * and rely on inferring new ones from the unsigned bounds and
2199 (u64)(dst_reg->smin_value) >> umax_val; 2261 * var_off of the result.
2200 } 2262 */
2263 dst_reg->smin_value = S64_MIN;
2264 dst_reg->smax_value = S64_MAX;
2201 if (src_known) 2265 if (src_known)
2202 dst_reg->var_off = tnum_rshift(dst_reg->var_off, 2266 dst_reg->var_off = tnum_rshift(dst_reg->var_off,
2203 umin_val); 2267 umin_val);
@@ -2213,6 +2277,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2213 break; 2277 break;
2214 } 2278 }
2215 2279
2280 if (BPF_CLASS(insn->code) != BPF_ALU64) {
2281 /* 32-bit ALU ops are (32,32)->32 */
2282 coerce_reg_to_size(dst_reg, 4);
2283 coerce_reg_to_size(&src_reg, 4);
2284 }
2285
2216 __reg_deduce_bounds(dst_reg); 2286 __reg_deduce_bounds(dst_reg);
2217 __reg_bound_offset(dst_reg); 2287 __reg_bound_offset(dst_reg);
2218 return 0; 2288 return 0;
@@ -2227,7 +2297,6 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2227 struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg; 2297 struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg;
2228 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; 2298 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
2229 u8 opcode = BPF_OP(insn->code); 2299 u8 opcode = BPF_OP(insn->code);
2230 int rc;
2231 2300
2232 dst_reg = &regs[insn->dst_reg]; 2301 dst_reg = &regs[insn->dst_reg];
2233 src_reg = NULL; 2302 src_reg = NULL;
@@ -2238,43 +2307,29 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2238 if (src_reg->type != SCALAR_VALUE) { 2307 if (src_reg->type != SCALAR_VALUE) {
2239 if (dst_reg->type != SCALAR_VALUE) { 2308 if (dst_reg->type != SCALAR_VALUE) {
2240 /* Combining two pointers by any ALU op yields 2309 /* Combining two pointers by any ALU op yields
2241 * an arbitrary scalar. 2310 * an arbitrary scalar. Disallow all math except
2311 * pointer subtraction
2242 */ 2312 */
2243 if (!env->allow_ptr_leaks) { 2313 if (opcode == BPF_SUB){
2244 verbose(env, "R%d pointer %s pointer prohibited\n", 2314 mark_reg_unknown(env, regs, insn->dst_reg);
2245 insn->dst_reg, 2315 return 0;
2246 bpf_alu_string[opcode >> 4]);
2247 return -EACCES;
2248 } 2316 }
2249 mark_reg_unknown(env, regs, insn->dst_reg); 2317 verbose(env, "R%d pointer %s pointer prohibited\n",
2250 return 0; 2318 insn->dst_reg,
2319 bpf_alu_string[opcode >> 4]);
2320 return -EACCES;
2251 } else { 2321 } else {
2252 /* scalar += pointer 2322 /* scalar += pointer
2253 * This is legal, but we have to reverse our 2323 * This is legal, but we have to reverse our
2254 * src/dest handling in computing the range 2324 * src/dest handling in computing the range
2255 */ 2325 */
2256 rc = adjust_ptr_min_max_vals(env, insn, 2326 return adjust_ptr_min_max_vals(env, insn,
2257 src_reg, dst_reg); 2327 src_reg, dst_reg);
2258 if (rc == -EACCES && env->allow_ptr_leaks) {
2259 /* scalar += unknown scalar */
2260 __mark_reg_unknown(&off_reg);
2261 return adjust_scalar_min_max_vals(
2262 env, insn,
2263 dst_reg, off_reg);
2264 }
2265 return rc;
2266 } 2328 }
2267 } else if (ptr_reg) { 2329 } else if (ptr_reg) {
2268 /* pointer += scalar */ 2330 /* pointer += scalar */
2269 rc = adjust_ptr_min_max_vals(env, insn, 2331 return adjust_ptr_min_max_vals(env, insn,
2270 dst_reg, src_reg); 2332 dst_reg, src_reg);
2271 if (rc == -EACCES && env->allow_ptr_leaks) {
2272 /* unknown scalar += scalar */
2273 __mark_reg_unknown(dst_reg);
2274 return adjust_scalar_min_max_vals(
2275 env, insn, dst_reg, *src_reg);
2276 }
2277 return rc;
2278 } 2333 }
2279 } else { 2334 } else {
2280 /* Pretend the src is a reg with a known value, since we only 2335 /* Pretend the src is a reg with a known value, since we only
@@ -2283,17 +2338,9 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2283 off_reg.type = SCALAR_VALUE; 2338 off_reg.type = SCALAR_VALUE;
2284 __mark_reg_known(&off_reg, insn->imm); 2339 __mark_reg_known(&off_reg, insn->imm);
2285 src_reg = &off_reg; 2340 src_reg = &off_reg;
2286 if (ptr_reg) { /* pointer += K */ 2341 if (ptr_reg) /* pointer += K */
2287 rc = adjust_ptr_min_max_vals(env, insn, 2342 return adjust_ptr_min_max_vals(env, insn,
2288 ptr_reg, src_reg); 2343 ptr_reg, src_reg);
2289 if (rc == -EACCES && env->allow_ptr_leaks) {
2290 /* unknown scalar += K */
2291 __mark_reg_unknown(dst_reg);
2292 return adjust_scalar_min_max_vals(
2293 env, insn, dst_reg, off_reg);
2294 }
2295 return rc;
2296 }
2297 } 2344 }
2298 2345
2299 /* Got here implies adding two SCALAR_VALUEs */ 2346 /* Got here implies adding two SCALAR_VALUEs */
@@ -2390,17 +2437,20 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
2390 return -EACCES; 2437 return -EACCES;
2391 } 2438 }
2392 mark_reg_unknown(env, regs, insn->dst_reg); 2439 mark_reg_unknown(env, regs, insn->dst_reg);
2393 /* high 32 bits are known zero. */ 2440 coerce_reg_to_size(&regs[insn->dst_reg], 4);
2394 regs[insn->dst_reg].var_off = tnum_cast(
2395 regs[insn->dst_reg].var_off, 4);
2396 __update_reg_bounds(&regs[insn->dst_reg]);
2397 } 2441 }
2398 } else { 2442 } else {
2399 /* case: R = imm 2443 /* case: R = imm
2400 * remember the value we stored into this reg 2444 * remember the value we stored into this reg
2401 */ 2445 */
2402 regs[insn->dst_reg].type = SCALAR_VALUE; 2446 regs[insn->dst_reg].type = SCALAR_VALUE;
2403 __mark_reg_known(regs + insn->dst_reg, insn->imm); 2447 if (BPF_CLASS(insn->code) == BPF_ALU64) {
2448 __mark_reg_known(regs + insn->dst_reg,
2449 insn->imm);
2450 } else {
2451 __mark_reg_known(regs + insn->dst_reg,
2452 (u32)insn->imm);
2453 }
2404 } 2454 }
2405 2455
2406 } else if (opcode > BPF_END) { 2456 } else if (opcode > BPF_END) {
@@ -3431,15 +3481,14 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
3431 return range_within(rold, rcur) && 3481 return range_within(rold, rcur) &&
3432 tnum_in(rold->var_off, rcur->var_off); 3482 tnum_in(rold->var_off, rcur->var_off);
3433 } else { 3483 } else {
3434 /* if we knew anything about the old value, we're not 3484 /* We're trying to use a pointer in place of a scalar.
3435 * equal, because we can't know anything about the 3485 * Even if the scalar was unbounded, this could lead to
3436 * scalar value of the pointer in the new value. 3486 * pointer leaks because scalars are allowed to leak
3487 * while pointers are not. We could make this safe in
3488 * special cases if root is calling us, but it's
3489 * probably not worth the hassle.
3437 */ 3490 */
3438 return rold->umin_value == 0 && 3491 return false;
3439 rold->umax_value == U64_MAX &&
3440 rold->smin_value == S64_MIN &&
3441 rold->smax_value == S64_MAX &&
3442 tnum_is_unknown(rold->var_off);
3443 } 3492 }
3444 case PTR_TO_MAP_VALUE: 3493 case PTR_TO_MAP_VALUE:
3445 /* If the new min/max/var_off satisfy the old ones and 3494 /* If the new min/max/var_off satisfy the old ones and
diff --git a/kernel/cgroup/debug.c b/kernel/cgroup/debug.c
index 5f780d8f6a9d..9caeda610249 100644
--- a/kernel/cgroup/debug.c
+++ b/kernel/cgroup/debug.c
@@ -50,7 +50,7 @@ static int current_css_set_read(struct seq_file *seq, void *v)
50 50
51 spin_lock_irq(&css_set_lock); 51 spin_lock_irq(&css_set_lock);
52 rcu_read_lock(); 52 rcu_read_lock();
53 cset = rcu_dereference(current->cgroups); 53 cset = task_css_set(current);
54 refcnt = refcount_read(&cset->refcount); 54 refcnt = refcount_read(&cset->refcount);
55 seq_printf(seq, "css_set %pK %d", cset, refcnt); 55 seq_printf(seq, "css_set %pK %d", cset, refcnt);
56 if (refcnt > cset->nr_tasks) 56 if (refcnt > cset->nr_tasks)
@@ -96,7 +96,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
96 96
97 spin_lock_irq(&css_set_lock); 97 spin_lock_irq(&css_set_lock);
98 rcu_read_lock(); 98 rcu_read_lock();
99 cset = rcu_dereference(current->cgroups); 99 cset = task_css_set(current);
100 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { 100 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
101 struct cgroup *c = link->cgrp; 101 struct cgroup *c = link->cgrp;
102 102
diff --git a/kernel/cgroup/stat.c b/kernel/cgroup/stat.c
index 133b465691d6..1e111dd455c4 100644
--- a/kernel/cgroup/stat.c
+++ b/kernel/cgroup/stat.c
@@ -296,8 +296,12 @@ int cgroup_stat_init(struct cgroup *cgrp)
296 } 296 }
297 297
298 /* ->updated_children list is self terminated */ 298 /* ->updated_children list is self terminated */
299 for_each_possible_cpu(cpu) 299 for_each_possible_cpu(cpu) {
300 cgroup_cpu_stat(cgrp, cpu)->updated_children = cgrp; 300 struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
301
302 cstat->updated_children = cgrp;
303 u64_stats_init(&cstat->sync);
304 }
301 305
302 prev_cputime_init(&cgrp->stat.prev_cputime); 306 prev_cputime_init(&cgrp->stat.prev_cputime);
303 307
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 41376c3ac93b..53f7dc65f9a3 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -80,19 +80,19 @@ static struct lockdep_map cpuhp_state_down_map =
80 STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map); 80 STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
81 81
82 82
83static void inline cpuhp_lock_acquire(bool bringup) 83static inline void cpuhp_lock_acquire(bool bringup)
84{ 84{
85 lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); 85 lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
86} 86}
87 87
88static void inline cpuhp_lock_release(bool bringup) 88static inline void cpuhp_lock_release(bool bringup)
89{ 89{
90 lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); 90 lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
91} 91}
92#else 92#else
93 93
94static void inline cpuhp_lock_acquire(bool bringup) { } 94static inline void cpuhp_lock_acquire(bool bringup) { }
95static void inline cpuhp_lock_release(bool bringup) { } 95static inline void cpuhp_lock_release(bool bringup) { }
96 96
97#endif 97#endif
98 98
@@ -1277,9 +1277,9 @@ static struct cpuhp_step cpuhp_bp_states[] = {
1277 * before blk_mq_queue_reinit_notify() from notify_dead(), 1277 * before blk_mq_queue_reinit_notify() from notify_dead(),
1278 * otherwise a RCU stall occurs. 1278 * otherwise a RCU stall occurs.
1279 */ 1279 */
1280 [CPUHP_TIMERS_DEAD] = { 1280 [CPUHP_TIMERS_PREPARE] = {
1281 .name = "timers:dead", 1281 .name = "timers:dead",
1282 .startup.single = NULL, 1282 .startup.single = timers_prepare_cpu,
1283 .teardown.single = timers_dead_cpu, 1283 .teardown.single = timers_dead_cpu,
1284 }, 1284 },
1285 /* Kicks the plugged cpu into life */ 1285 /* Kicks the plugged cpu into life */
diff --git a/kernel/exit.c b/kernel/exit.c
index 6b4298a41167..df0c91d5606c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1755,3 +1755,11 @@ Efault:
1755 return -EFAULT; 1755 return -EFAULT;
1756} 1756}
1757#endif 1757#endif
1758
1759__weak void abort(void)
1760{
1761 BUG();
1762
1763 /* if that doesn't kill us, halt */
1764 panic("Oops failed to kill thread");
1765}
diff --git a/kernel/fork.c b/kernel/fork.c
index 432eadf6b58c..2295fc69717f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -721,8 +721,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
721 goto out; 721 goto out;
722 } 722 }
723 /* a new mm has just been created */ 723 /* a new mm has just been created */
724 arch_dup_mmap(oldmm, mm); 724 retval = arch_dup_mmap(oldmm, mm);
725 retval = 0;
726out: 725out:
727 up_write(&mm->mmap_sem); 726 up_write(&mm->mmap_sem);
728 flush_tlb_mm(oldmm); 727 flush_tlb_mm(oldmm);
diff --git a/kernel/groups.c b/kernel/groups.c
index e357bc800111..daae2f2dc6d4 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -86,11 +86,12 @@ static int gid_cmp(const void *_a, const void *_b)
86 return gid_gt(a, b) - gid_lt(a, b); 86 return gid_gt(a, b) - gid_lt(a, b);
87} 87}
88 88
89static void groups_sort(struct group_info *group_info) 89void groups_sort(struct group_info *group_info)
90{ 90{
91 sort(group_info->gid, group_info->ngroups, sizeof(*group_info->gid), 91 sort(group_info->gid, group_info->ngroups, sizeof(*group_info->gid),
92 gid_cmp, NULL); 92 gid_cmp, NULL);
93} 93}
94EXPORT_SYMBOL(groups_sort);
94 95
95/* a simple bsearch */ 96/* a simple bsearch */
96int groups_search(const struct group_info *group_info, kgid_t grp) 97int groups_search(const struct group_info *group_info, kgid_t grp)
@@ -122,7 +123,6 @@ int groups_search(const struct group_info *group_info, kgid_t grp)
122void set_groups(struct cred *new, struct group_info *group_info) 123void set_groups(struct cred *new, struct group_info *group_info)
123{ 124{
124 put_group_info(new->group_info); 125 put_group_info(new->group_info);
125 groups_sort(group_info);
126 get_group_info(group_info); 126 get_group_info(group_info);
127 new->group_info = group_info; 127 new->group_info = group_info;
128} 128}
@@ -206,6 +206,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
206 return retval; 206 return retval;
207 } 207 }
208 208
209 groups_sort(group_info);
209 retval = set_current_groups(group_info); 210 retval = set_current_groups(group_info);
210 put_group_info(group_info); 211 put_group_info(group_info);
211 212
diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h
index 17f05ef8f575..e4d3819a91cc 100644
--- a/kernel/irq/debug.h
+++ b/kernel/irq/debug.h
@@ -12,6 +12,11 @@
12 12
13static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc) 13static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
14{ 14{
15 static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5);
16
17 if (!__ratelimit(&ratelimit))
18 return;
19
15 printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n", 20 printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n",
16 irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled); 21 irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled);
17 printk("->handle_irq(): %p, ", desc->handle_irq); 22 printk("->handle_irq(): %p, ", desc->handle_irq);
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index 7f608ac39653..acfaaef8672a 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -113,6 +113,7 @@ static const struct irq_bit_descr irqdata_states[] = {
113 BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING), 113 BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING),
114 BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED), 114 BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED),
115 BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN), 115 BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN),
116 BIT_MASK_DESCR(IRQD_CAN_RESERVE),
116 117
117 BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU), 118 BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU),
118 119
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index c26c5bb6b491..508c03dfef25 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -364,10 +364,11 @@ irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq)
364EXPORT_SYMBOL_GPL(irq_get_domain_generic_chip); 364EXPORT_SYMBOL_GPL(irq_get_domain_generic_chip);
365 365
366/* 366/*
367 * Separate lockdep class for interrupt chip which can nest irq_desc 367 * Separate lockdep classes for interrupt chip which can nest irq_desc
368 * lock. 368 * lock and request mutex.
369 */ 369 */
370static struct lock_class_key irq_nested_lock_class; 370static struct lock_class_key irq_nested_lock_class;
371static struct lock_class_key irq_nested_request_class;
371 372
372/* 373/*
373 * irq_map_generic_chip - Map a generic chip for an irq domain 374 * irq_map_generic_chip - Map a generic chip for an irq domain
@@ -409,7 +410,8 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq,
409 set_bit(idx, &gc->installed); 410 set_bit(idx, &gc->installed);
410 411
411 if (dgc->gc_flags & IRQ_GC_INIT_NESTED_LOCK) 412 if (dgc->gc_flags & IRQ_GC_INIT_NESTED_LOCK)
412 irq_set_lockdep_class(virq, &irq_nested_lock_class); 413 irq_set_lockdep_class(virq, &irq_nested_lock_class,
414 &irq_nested_request_class);
413 415
414 if (chip->irq_calc_mask) 416 if (chip->irq_calc_mask)
415 chip->irq_calc_mask(data); 417 chip->irq_calc_mask(data);
@@ -479,7 +481,8 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
479 continue; 481 continue;
480 482
481 if (flags & IRQ_GC_INIT_NESTED_LOCK) 483 if (flags & IRQ_GC_INIT_NESTED_LOCK)
482 irq_set_lockdep_class(i, &irq_nested_lock_class); 484 irq_set_lockdep_class(i, &irq_nested_lock_class,
485 &irq_nested_request_class);
483 486
484 if (!(flags & IRQ_GC_NO_MASK)) { 487 if (!(flags & IRQ_GC_NO_MASK)) {
485 struct irq_data *d = irq_get_irq_data(i); 488 struct irq_data *d = irq_get_irq_data(i);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 07d08ca701ec..ab19371eab9b 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -440,7 +440,7 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear)
440#endif /* !CONFIG_GENERIC_PENDING_IRQ */ 440#endif /* !CONFIG_GENERIC_PENDING_IRQ */
441 441
442#if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY) 442#if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY)
443static inline int irq_domain_activate_irq(struct irq_data *data, bool early) 443static inline int irq_domain_activate_irq(struct irq_data *data, bool reserve)
444{ 444{
445 irqd_set_activated(data); 445 irqd_set_activated(data);
446 return 0; 446 return 0;
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 4f4f60015e8a..62068ad46930 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1693,7 +1693,7 @@ static void __irq_domain_deactivate_irq(struct irq_data *irq_data)
1693 } 1693 }
1694} 1694}
1695 1695
1696static int __irq_domain_activate_irq(struct irq_data *irqd, bool early) 1696static int __irq_domain_activate_irq(struct irq_data *irqd, bool reserve)
1697{ 1697{
1698 int ret = 0; 1698 int ret = 0;
1699 1699
@@ -1702,9 +1702,9 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early)
1702 1702
1703 if (irqd->parent_data) 1703 if (irqd->parent_data)
1704 ret = __irq_domain_activate_irq(irqd->parent_data, 1704 ret = __irq_domain_activate_irq(irqd->parent_data,
1705 early); 1705 reserve);
1706 if (!ret && domain->ops->activate) { 1706 if (!ret && domain->ops->activate) {
1707 ret = domain->ops->activate(domain, irqd, early); 1707 ret = domain->ops->activate(domain, irqd, reserve);
1708 /* Rollback in case of error */ 1708 /* Rollback in case of error */
1709 if (ret && irqd->parent_data) 1709 if (ret && irqd->parent_data)
1710 __irq_domain_deactivate_irq(irqd->parent_data); 1710 __irq_domain_deactivate_irq(irqd->parent_data);
@@ -1716,17 +1716,18 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early)
1716/** 1716/**
1717 * irq_domain_activate_irq - Call domain_ops->activate recursively to activate 1717 * irq_domain_activate_irq - Call domain_ops->activate recursively to activate
1718 * interrupt 1718 * interrupt
1719 * @irq_data: outermost irq_data associated with interrupt 1719 * @irq_data: Outermost irq_data associated with interrupt
1720 * @reserve: If set only reserve an interrupt vector instead of assigning one
1720 * 1721 *
1721 * This is the second step to call domain_ops->activate to program interrupt 1722 * This is the second step to call domain_ops->activate to program interrupt
1722 * controllers, so the interrupt could actually get delivered. 1723 * controllers, so the interrupt could actually get delivered.
1723 */ 1724 */
1724int irq_domain_activate_irq(struct irq_data *irq_data, bool early) 1725int irq_domain_activate_irq(struct irq_data *irq_data, bool reserve)
1725{ 1726{
1726 int ret = 0; 1727 int ret = 0;
1727 1728
1728 if (!irqd_is_activated(irq_data)) 1729 if (!irqd_is_activated(irq_data))
1729 ret = __irq_domain_activate_irq(irq_data, early); 1730 ret = __irq_domain_activate_irq(irq_data, reserve);
1730 if (!ret) 1731 if (!ret)
1731 irqd_set_activated(irq_data); 1732 irqd_set_activated(irq_data);
1732 return ret; 1733 return ret;
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index edb987b2c58d..2f3c4f5382cc 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -339,6 +339,40 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
339 return ret; 339 return ret;
340} 340}
341 341
342/*
343 * Carefully check whether the device can use reservation mode. If
344 * reservation mode is enabled then the early activation will assign a
345 * dummy vector to the device. If the PCI/MSI device does not support
346 * masking of the entry then this can result in spurious interrupts when
347 * the device driver is not absolutely careful. But even then a malfunction
348 * of the hardware could result in a spurious interrupt on the dummy vector
349 * and render the device unusable. If the entry can be masked then the core
350 * logic will prevent the spurious interrupt and reservation mode can be
351 * used. For now reservation mode is restricted to PCI/MSI.
352 */
353static bool msi_check_reservation_mode(struct irq_domain *domain,
354 struct msi_domain_info *info,
355 struct device *dev)
356{
357 struct msi_desc *desc;
358
359 if (domain->bus_token != DOMAIN_BUS_PCI_MSI)
360 return false;
361
362 if (!(info->flags & MSI_FLAG_MUST_REACTIVATE))
363 return false;
364
365 if (IS_ENABLED(CONFIG_PCI_MSI) && pci_msi_ignore_mask)
366 return false;
367
368 /*
369 * Checking the first MSI descriptor is sufficient. MSIX supports
370 * masking and MSI does so when the maskbit is set.
371 */
372 desc = first_msi_entry(dev);
373 return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit;
374}
375
342/** 376/**
343 * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain 377 * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain
344 * @domain: The domain to allocate from 378 * @domain: The domain to allocate from
@@ -353,9 +387,11 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
353{ 387{
354 struct msi_domain_info *info = domain->host_data; 388 struct msi_domain_info *info = domain->host_data;
355 struct msi_domain_ops *ops = info->ops; 389 struct msi_domain_ops *ops = info->ops;
356 msi_alloc_info_t arg; 390 struct irq_data *irq_data;
357 struct msi_desc *desc; 391 struct msi_desc *desc;
392 msi_alloc_info_t arg;
358 int i, ret, virq; 393 int i, ret, virq;
394 bool can_reserve;
359 395
360 ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg); 396 ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg);
361 if (ret) 397 if (ret)
@@ -385,6 +421,8 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
385 if (ops->msi_finish) 421 if (ops->msi_finish)
386 ops->msi_finish(&arg, 0); 422 ops->msi_finish(&arg, 0);
387 423
424 can_reserve = msi_check_reservation_mode(domain, info, dev);
425
388 for_each_msi_entry(desc, dev) { 426 for_each_msi_entry(desc, dev) {
389 virq = desc->irq; 427 virq = desc->irq;
390 if (desc->nvec_used == 1) 428 if (desc->nvec_used == 1)
@@ -397,15 +435,25 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
397 * the MSI entries before the PCI layer enables MSI in the 435 * the MSI entries before the PCI layer enables MSI in the
398 * card. Otherwise the card latches a random msi message. 436 * card. Otherwise the card latches a random msi message.
399 */ 437 */
400 if (info->flags & MSI_FLAG_ACTIVATE_EARLY) { 438 if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
401 struct irq_data *irq_data; 439 continue;
402 440
441 irq_data = irq_domain_get_irq_data(domain, desc->irq);
442 if (!can_reserve)
443 irqd_clr_can_reserve(irq_data);
444 ret = irq_domain_activate_irq(irq_data, can_reserve);
445 if (ret)
446 goto cleanup;
447 }
448
449 /*
450 * If these interrupts use reservation mode, clear the activated bit
451 * so request_irq() will assign the final vector.
452 */
453 if (can_reserve) {
454 for_each_msi_entry(desc, dev) {
403 irq_data = irq_domain_get_irq_data(domain, desc->irq); 455 irq_data = irq_domain_get_irq_data(domain, desc->irq);
404 ret = irq_domain_activate_irq(irq_data, true); 456 irqd_clr_activated(irq_data);
405 if (ret)
406 goto cleanup;
407 if (info->flags & MSI_FLAG_MUST_REACTIVATE)
408 irqd_clr_activated(irq_data);
409 } 457 }
410 } 458 }
411 return 0; 459 return 0;
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 15f33faf4013..7594c033d98a 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -157,7 +157,7 @@ void notrace __sanitizer_cov_trace_cmp2(u16 arg1, u16 arg2)
157} 157}
158EXPORT_SYMBOL(__sanitizer_cov_trace_cmp2); 158EXPORT_SYMBOL(__sanitizer_cov_trace_cmp2);
159 159
160void notrace __sanitizer_cov_trace_cmp4(u16 arg1, u16 arg2) 160void notrace __sanitizer_cov_trace_cmp4(u32 arg1, u32 arg2)
161{ 161{
162 write_comp_data(KCOV_CMP_SIZE(2), arg1, arg2, _RET_IP_); 162 write_comp_data(KCOV_CMP_SIZE(2), arg1, arg2, _RET_IP_);
163} 163}
@@ -183,7 +183,7 @@ void notrace __sanitizer_cov_trace_const_cmp2(u16 arg1, u16 arg2)
183} 183}
184EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp2); 184EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp2);
185 185
186void notrace __sanitizer_cov_trace_const_cmp4(u16 arg1, u16 arg2) 186void notrace __sanitizer_cov_trace_const_cmp4(u32 arg1, u32 arg2)
187{ 187{
188 write_comp_data(KCOV_CMP_SIZE(2) | KCOV_CMP_CONST, arg1, arg2, 188 write_comp_data(KCOV_CMP_SIZE(2) | KCOV_CMP_CONST, arg1, arg2,
189 _RET_IP_); 189 _RET_IP_);
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 670d8d7d8087..5fa1324a4f29 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -57,10 +57,6 @@
57#define CREATE_TRACE_POINTS 57#define CREATE_TRACE_POINTS
58#include <trace/events/lock.h> 58#include <trace/events/lock.h>
59 59
60#ifdef CONFIG_LOCKDEP_CROSSRELEASE
61#include <linux/slab.h>
62#endif
63
64#ifdef CONFIG_PROVE_LOCKING 60#ifdef CONFIG_PROVE_LOCKING
65int prove_locking = 1; 61int prove_locking = 1;
66module_param(prove_locking, int, 0644); 62module_param(prove_locking, int, 0644);
@@ -75,19 +71,6 @@ module_param(lock_stat, int, 0644);
75#define lock_stat 0 71#define lock_stat 0
76#endif 72#endif
77 73
78#ifdef CONFIG_BOOTPARAM_LOCKDEP_CROSSRELEASE_FULLSTACK
79static int crossrelease_fullstack = 1;
80#else
81static int crossrelease_fullstack;
82#endif
83static int __init allow_crossrelease_fullstack(char *str)
84{
85 crossrelease_fullstack = 1;
86 return 0;
87}
88
89early_param("crossrelease_fullstack", allow_crossrelease_fullstack);
90
91/* 74/*
92 * lockdep_lock: protects the lockdep graph, the hashes and the 75 * lockdep_lock: protects the lockdep graph, the hashes and the
93 * class/list/hash allocators. 76 * class/list/hash allocators.
@@ -740,18 +723,6 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
740 return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL); 723 return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL);
741} 724}
742 725
743#ifdef CONFIG_LOCKDEP_CROSSRELEASE
744static void cross_init(struct lockdep_map *lock, int cross);
745static int cross_lock(struct lockdep_map *lock);
746static int lock_acquire_crosslock(struct held_lock *hlock);
747static int lock_release_crosslock(struct lockdep_map *lock);
748#else
749static inline void cross_init(struct lockdep_map *lock, int cross) {}
750static inline int cross_lock(struct lockdep_map *lock) { return 0; }
751static inline int lock_acquire_crosslock(struct held_lock *hlock) { return 2; }
752static inline int lock_release_crosslock(struct lockdep_map *lock) { return 2; }
753#endif
754
755/* 726/*
756 * Register a lock's class in the hash-table, if the class is not present 727 * Register a lock's class in the hash-table, if the class is not present
757 * yet. Otherwise we look it up. We cache the result in the lock object 728 * yet. Otherwise we look it up. We cache the result in the lock object
@@ -1151,41 +1122,22 @@ print_circular_lock_scenario(struct held_lock *src,
1151 printk(KERN_CONT "\n\n"); 1122 printk(KERN_CONT "\n\n");
1152 } 1123 }
1153 1124
1154 if (cross_lock(tgt->instance)) { 1125 printk(" Possible unsafe locking scenario:\n\n");
1155 printk(" Possible unsafe locking scenario by crosslock:\n\n"); 1126 printk(" CPU0 CPU1\n");
1156 printk(" CPU0 CPU1\n"); 1127 printk(" ---- ----\n");
1157 printk(" ---- ----\n"); 1128 printk(" lock(");
1158 printk(" lock("); 1129 __print_lock_name(target);
1159 __print_lock_name(parent); 1130 printk(KERN_CONT ");\n");
1160 printk(KERN_CONT ");\n"); 1131 printk(" lock(");
1161 printk(" lock("); 1132 __print_lock_name(parent);
1162 __print_lock_name(target); 1133 printk(KERN_CONT ");\n");
1163 printk(KERN_CONT ");\n"); 1134 printk(" lock(");
1164 printk(" lock("); 1135 __print_lock_name(target);
1165 __print_lock_name(source); 1136 printk(KERN_CONT ");\n");
1166 printk(KERN_CONT ");\n"); 1137 printk(" lock(");
1167 printk(" unlock("); 1138 __print_lock_name(source);
1168 __print_lock_name(target); 1139 printk(KERN_CONT ");\n");
1169 printk(KERN_CONT ");\n"); 1140 printk("\n *** DEADLOCK ***\n\n");
1170 printk("\n *** DEADLOCK ***\n\n");
1171 } else {
1172 printk(" Possible unsafe locking scenario:\n\n");
1173 printk(" CPU0 CPU1\n");
1174 printk(" ---- ----\n");
1175 printk(" lock(");
1176 __print_lock_name(target);
1177 printk(KERN_CONT ");\n");
1178 printk(" lock(");
1179 __print_lock_name(parent);
1180 printk(KERN_CONT ");\n");
1181 printk(" lock(");
1182 __print_lock_name(target);
1183 printk(KERN_CONT ");\n");
1184 printk(" lock(");
1185 __print_lock_name(source);
1186 printk(KERN_CONT ");\n");
1187 printk("\n *** DEADLOCK ***\n\n");
1188 }
1189} 1141}
1190 1142
1191/* 1143/*
@@ -1211,10 +1163,7 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
1211 curr->comm, task_pid_nr(curr)); 1163 curr->comm, task_pid_nr(curr));
1212 print_lock(check_src); 1164 print_lock(check_src);
1213 1165
1214 if (cross_lock(check_tgt->instance)) 1166 pr_warn("\nbut task is already holding lock:\n");
1215 pr_warn("\nbut now in release context of a crosslock acquired at the following:\n");
1216 else
1217 pr_warn("\nbut task is already holding lock:\n");
1218 1167
1219 print_lock(check_tgt); 1168 print_lock(check_tgt);
1220 pr_warn("\nwhich lock already depends on the new lock.\n\n"); 1169 pr_warn("\nwhich lock already depends on the new lock.\n\n");
@@ -1244,9 +1193,7 @@ static noinline int print_circular_bug(struct lock_list *this,
1244 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 1193 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
1245 return 0; 1194 return 0;
1246 1195
1247 if (cross_lock(check_tgt->instance)) 1196 if (!save_trace(&this->trace))
1248 this->trace = *trace;
1249 else if (!save_trace(&this->trace))
1250 return 0; 1197 return 0;
1251 1198
1252 depth = get_lock_depth(target); 1199 depth = get_lock_depth(target);
@@ -1850,9 +1797,6 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
1850 if (nest) 1797 if (nest)
1851 return 2; 1798 return 2;
1852 1799
1853 if (cross_lock(prev->instance))
1854 continue;
1855
1856 return print_deadlock_bug(curr, prev, next); 1800 return print_deadlock_bug(curr, prev, next);
1857 } 1801 }
1858 return 1; 1802 return 1;
@@ -2018,31 +1962,26 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
2018 for (;;) { 1962 for (;;) {
2019 int distance = curr->lockdep_depth - depth + 1; 1963 int distance = curr->lockdep_depth - depth + 1;
2020 hlock = curr->held_locks + depth - 1; 1964 hlock = curr->held_locks + depth - 1;
1965
2021 /* 1966 /*
2022 * Only non-crosslock entries get new dependencies added. 1967 * Only non-recursive-read entries get new dependencies
2023 * Crosslock entries will be added by commit later: 1968 * added:
2024 */ 1969 */
2025 if (!cross_lock(hlock->instance)) { 1970 if (hlock->read != 2 && hlock->check) {
1971 int ret = check_prev_add(curr, hlock, next, distance, &trace, save_trace);
1972 if (!ret)
1973 return 0;
1974
2026 /* 1975 /*
2027 * Only non-recursive-read entries get new dependencies 1976 * Stop after the first non-trylock entry,
2028 * added: 1977 * as non-trylock entries have added their
1978 * own direct dependencies already, so this
1979 * lock is connected to them indirectly:
2029 */ 1980 */
2030 if (hlock->read != 2 && hlock->check) { 1981 if (!hlock->trylock)
2031 int ret = check_prev_add(curr, hlock, next, 1982 break;
2032 distance, &trace, save_trace);
2033 if (!ret)
2034 return 0;
2035
2036 /*
2037 * Stop after the first non-trylock entry,
2038 * as non-trylock entries have added their
2039 * own direct dependencies already, so this
2040 * lock is connected to them indirectly:
2041 */
2042 if (!hlock->trylock)
2043 break;
2044 }
2045 } 1983 }
1984
2046 depth--; 1985 depth--;
2047 /* 1986 /*
2048 * End of lock-stack? 1987 * End of lock-stack?
@@ -3292,21 +3231,10 @@ static void __lockdep_init_map(struct lockdep_map *lock, const char *name,
3292void lockdep_init_map(struct lockdep_map *lock, const char *name, 3231void lockdep_init_map(struct lockdep_map *lock, const char *name,
3293 struct lock_class_key *key, int subclass) 3232 struct lock_class_key *key, int subclass)
3294{ 3233{
3295 cross_init(lock, 0);
3296 __lockdep_init_map(lock, name, key, subclass); 3234 __lockdep_init_map(lock, name, key, subclass);
3297} 3235}
3298EXPORT_SYMBOL_GPL(lockdep_init_map); 3236EXPORT_SYMBOL_GPL(lockdep_init_map);
3299 3237
3300#ifdef CONFIG_LOCKDEP_CROSSRELEASE
3301void lockdep_init_map_crosslock(struct lockdep_map *lock, const char *name,
3302 struct lock_class_key *key, int subclass)
3303{
3304 cross_init(lock, 1);
3305 __lockdep_init_map(lock, name, key, subclass);
3306}
3307EXPORT_SYMBOL_GPL(lockdep_init_map_crosslock);
3308#endif
3309
3310struct lock_class_key __lockdep_no_validate__; 3238struct lock_class_key __lockdep_no_validate__;
3311EXPORT_SYMBOL_GPL(__lockdep_no_validate__); 3239EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
3312 3240
@@ -3362,7 +3290,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3362 int chain_head = 0; 3290 int chain_head = 0;
3363 int class_idx; 3291 int class_idx;
3364 u64 chain_key; 3292 u64 chain_key;
3365 int ret;
3366 3293
3367 if (unlikely(!debug_locks)) 3294 if (unlikely(!debug_locks))
3368 return 0; 3295 return 0;
@@ -3411,8 +3338,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3411 3338
3412 class_idx = class - lock_classes + 1; 3339 class_idx = class - lock_classes + 1;
3413 3340
3414 /* TODO: nest_lock is not implemented for crosslock yet. */ 3341 if (depth) {
3415 if (depth && !cross_lock(lock)) {
3416 hlock = curr->held_locks + depth - 1; 3342 hlock = curr->held_locks + depth - 1;
3417 if (hlock->class_idx == class_idx && nest_lock) { 3343 if (hlock->class_idx == class_idx && nest_lock) {
3418 if (hlock->references) { 3344 if (hlock->references) {
@@ -3500,14 +3426,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3500 if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) 3426 if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
3501 return 0; 3427 return 0;
3502 3428
3503 ret = lock_acquire_crosslock(hlock);
3504 /*
3505 * 2 means normal acquire operations are needed. Otherwise, it's
3506 * ok just to return with '0:fail, 1:success'.
3507 */
3508 if (ret != 2)
3509 return ret;
3510
3511 curr->curr_chain_key = chain_key; 3429 curr->curr_chain_key = chain_key;
3512 curr->lockdep_depth++; 3430 curr->lockdep_depth++;
3513 check_chain_key(curr); 3431 check_chain_key(curr);
@@ -3745,19 +3663,11 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
3745 struct task_struct *curr = current; 3663 struct task_struct *curr = current;
3746 struct held_lock *hlock; 3664 struct held_lock *hlock;
3747 unsigned int depth; 3665 unsigned int depth;
3748 int ret, i; 3666 int i;
3749 3667
3750 if (unlikely(!debug_locks)) 3668 if (unlikely(!debug_locks))
3751 return 0; 3669 return 0;
3752 3670
3753 ret = lock_release_crosslock(lock);
3754 /*
3755 * 2 means normal release operations are needed. Otherwise, it's
3756 * ok just to return with '0:fail, 1:success'.
3757 */
3758 if (ret != 2)
3759 return ret;
3760
3761 depth = curr->lockdep_depth; 3671 depth = curr->lockdep_depth;
3762 /* 3672 /*
3763 * So we're all set to release this lock.. wait what lock? We don't 3673 * So we're all set to release this lock.. wait what lock? We don't
@@ -4675,495 +4585,3 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
4675 dump_stack(); 4585 dump_stack();
4676} 4586}
4677EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); 4587EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
4678
4679#ifdef CONFIG_LOCKDEP_CROSSRELEASE
4680
4681/*
4682 * Crossrelease works by recording a lock history for each thread and
4683 * connecting those historic locks that were taken after the
4684 * wait_for_completion() in the complete() context.
4685 *
4686 * Task-A Task-B
4687 *
4688 * mutex_lock(&A);
4689 * mutex_unlock(&A);
4690 *
4691 * wait_for_completion(&C);
4692 * lock_acquire_crosslock();
4693 * atomic_inc_return(&cross_gen_id);
4694 * |
4695 * | mutex_lock(&B);
4696 * | mutex_unlock(&B);
4697 * |
4698 * | complete(&C);
4699 * `-- lock_commit_crosslock();
4700 *
4701 * Which will then add a dependency between B and C.
4702 */
4703
4704#define xhlock(i) (current->xhlocks[(i) % MAX_XHLOCKS_NR])
4705
4706/*
4707 * Whenever a crosslock is held, cross_gen_id will be increased.
4708 */
4709static atomic_t cross_gen_id; /* Can be wrapped */
4710
4711/*
4712 * Make an entry of the ring buffer invalid.
4713 */
4714static inline void invalidate_xhlock(struct hist_lock *xhlock)
4715{
4716 /*
4717 * Normally, xhlock->hlock.instance must be !NULL.
4718 */
4719 xhlock->hlock.instance = NULL;
4720}
4721
4722/*
4723 * Lock history stacks; we have 2 nested lock history stacks:
4724 *
4725 * HARD(IRQ)
4726 * SOFT(IRQ)
4727 *
4728 * The thing is that once we complete a HARD/SOFT IRQ the future task locks
4729 * should not depend on any of the locks observed while running the IRQ. So
4730 * what we do is rewind the history buffer and erase all our knowledge of that
4731 * temporal event.
4732 */
4733
4734void crossrelease_hist_start(enum xhlock_context_t c)
4735{
4736 struct task_struct *cur = current;
4737
4738 if (!cur->xhlocks)
4739 return;
4740
4741 cur->xhlock_idx_hist[c] = cur->xhlock_idx;
4742 cur->hist_id_save[c] = cur->hist_id;
4743}
4744
4745void crossrelease_hist_end(enum xhlock_context_t c)
4746{
4747 struct task_struct *cur = current;
4748
4749 if (cur->xhlocks) {
4750 unsigned int idx = cur->xhlock_idx_hist[c];
4751 struct hist_lock *h = &xhlock(idx);
4752
4753 cur->xhlock_idx = idx;
4754
4755 /* Check if the ring was overwritten. */
4756 if (h->hist_id != cur->hist_id_save[c])
4757 invalidate_xhlock(h);
4758 }
4759}
4760
4761/*
4762 * lockdep_invariant_state() is used to annotate independence inside a task, to
4763 * make one task look like multiple independent 'tasks'.
4764 *
4765 * Take for instance workqueues; each work is independent of the last. The
4766 * completion of a future work does not depend on the completion of a past work
4767 * (in general). Therefore we must not carry that (lock) dependency across
4768 * works.
4769 *
4770 * This is true for many things; pretty much all kthreads fall into this
4771 * pattern, where they have an invariant state and future completions do not
4772 * depend on past completions. Its just that since they all have the 'same'
4773 * form -- the kthread does the same over and over -- it doesn't typically
4774 * matter.
4775 *
4776 * The same is true for system-calls, once a system call is completed (we've
4777 * returned to userspace) the next system call does not depend on the lock
4778 * history of the previous system call.
4779 *
4780 * They key property for independence, this invariant state, is that it must be
4781 * a point where we hold no locks and have no history. Because if we were to
4782 * hold locks, the restore at _end() would not necessarily recover it's history
4783 * entry. Similarly, independence per-definition means it does not depend on
4784 * prior state.
4785 */
4786void lockdep_invariant_state(bool force)
4787{
4788 /*
4789 * We call this at an invariant point, no current state, no history.
4790 * Verify the former, enforce the latter.
4791 */
4792 WARN_ON_ONCE(!force && current->lockdep_depth);
4793 if (current->xhlocks)
4794 invalidate_xhlock(&xhlock(current->xhlock_idx));
4795}
4796
4797static int cross_lock(struct lockdep_map *lock)
4798{
4799 return lock ? lock->cross : 0;
4800}
4801
4802/*
4803 * This is needed to decide the relationship between wrapable variables.
4804 */
4805static inline int before(unsigned int a, unsigned int b)
4806{
4807 return (int)(a - b) < 0;
4808}
4809
4810static inline struct lock_class *xhlock_class(struct hist_lock *xhlock)
4811{
4812 return hlock_class(&xhlock->hlock);
4813}
4814
4815static inline struct lock_class *xlock_class(struct cross_lock *xlock)
4816{
4817 return hlock_class(&xlock->hlock);
4818}
4819
4820/*
4821 * Should we check a dependency with previous one?
4822 */
4823static inline int depend_before(struct held_lock *hlock)
4824{
4825 return hlock->read != 2 && hlock->check && !hlock->trylock;
4826}
4827
4828/*
4829 * Should we check a dependency with next one?
4830 */
4831static inline int depend_after(struct held_lock *hlock)
4832{
4833 return hlock->read != 2 && hlock->check;
4834}
4835
4836/*
4837 * Check if the xhlock is valid, which would be false if,
4838 *
4839 * 1. Has not used after initializaion yet.
4840 * 2. Got invalidated.
4841 *
4842 * Remind hist_lock is implemented as a ring buffer.
4843 */
4844static inline int xhlock_valid(struct hist_lock *xhlock)
4845{
4846 /*
4847 * xhlock->hlock.instance must be !NULL.
4848 */
4849 return !!xhlock->hlock.instance;
4850}
4851
4852/*
4853 * Record a hist_lock entry.
4854 *
4855 * Irq disable is only required.
4856 */
4857static void add_xhlock(struct held_lock *hlock)
4858{
4859 unsigned int idx = ++current->xhlock_idx;
4860 struct hist_lock *xhlock = &xhlock(idx);
4861
4862#ifdef CONFIG_DEBUG_LOCKDEP
4863 /*
4864 * This can be done locklessly because they are all task-local
4865 * state, we must however ensure IRQs are disabled.
4866 */
4867 WARN_ON_ONCE(!irqs_disabled());
4868#endif
4869
4870 /* Initialize hist_lock's members */
4871 xhlock->hlock = *hlock;
4872 xhlock->hist_id = ++current->hist_id;
4873
4874 xhlock->trace.nr_entries = 0;
4875 xhlock->trace.max_entries = MAX_XHLOCK_TRACE_ENTRIES;
4876 xhlock->trace.entries = xhlock->trace_entries;
4877
4878 if (crossrelease_fullstack) {
4879 xhlock->trace.skip = 3;
4880 save_stack_trace(&xhlock->trace);
4881 } else {
4882 xhlock->trace.nr_entries = 1;
4883 xhlock->trace.entries[0] = hlock->acquire_ip;
4884 }
4885}
4886
4887static inline int same_context_xhlock(struct hist_lock *xhlock)
4888{
4889 return xhlock->hlock.irq_context == task_irq_context(current);
4890}
4891
4892/*
4893 * This should be lockless as far as possible because this would be
4894 * called very frequently.
4895 */
4896static void check_add_xhlock(struct held_lock *hlock)
4897{
4898 /*
4899 * Record a hist_lock, only in case that acquisitions ahead
4900 * could depend on the held_lock. For example, if the held_lock
4901 * is trylock then acquisitions ahead never depends on that.
4902 * In that case, we don't need to record it. Just return.
4903 */
4904 if (!current->xhlocks || !depend_before(hlock))
4905 return;
4906
4907 add_xhlock(hlock);
4908}
4909
4910/*
4911 * For crosslock.
4912 */
4913static int add_xlock(struct held_lock *hlock)
4914{
4915 struct cross_lock *xlock;
4916 unsigned int gen_id;
4917
4918 if (!graph_lock())
4919 return 0;
4920
4921 xlock = &((struct lockdep_map_cross *)hlock->instance)->xlock;
4922
4923 /*
4924 * When acquisitions for a crosslock are overlapped, we use
4925 * nr_acquire to perform commit for them, based on cross_gen_id
4926 * of the first acquisition, which allows to add additional
4927 * dependencies.
4928 *
4929 * Moreover, when no acquisition of a crosslock is in progress,
4930 * we should not perform commit because the lock might not exist
4931 * any more, which might cause incorrect memory access. So we
4932 * have to track the number of acquisitions of a crosslock.
4933 *
4934 * depend_after() is necessary to initialize only the first
4935 * valid xlock so that the xlock can be used on its commit.
4936 */
4937 if (xlock->nr_acquire++ && depend_after(&xlock->hlock))
4938 goto unlock;
4939
4940 gen_id = (unsigned int)atomic_inc_return(&cross_gen_id);
4941 xlock->hlock = *hlock;
4942 xlock->hlock.gen_id = gen_id;
4943unlock:
4944 graph_unlock();
4945 return 1;
4946}
4947
4948/*
4949 * Called for both normal and crosslock acquires. Normal locks will be
4950 * pushed on the hist_lock queue. Cross locks will record state and
4951 * stop regular lock_acquire() to avoid being placed on the held_lock
4952 * stack.
4953 *
4954 * Return: 0 - failure;
4955 * 1 - crosslock, done;
4956 * 2 - normal lock, continue to held_lock[] ops.
4957 */
4958static int lock_acquire_crosslock(struct held_lock *hlock)
4959{
4960 /*
4961 * CONTEXT 1 CONTEXT 2
4962 * --------- ---------
4963 * lock A (cross)
4964 * X = atomic_inc_return(&cross_gen_id)
4965 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4966 * Y = atomic_read_acquire(&cross_gen_id)
4967 * lock B
4968 *
4969 * atomic_read_acquire() is for ordering between A and B,
4970 * IOW, A happens before B, when CONTEXT 2 see Y >= X.
4971 *
4972 * Pairs with atomic_inc_return() in add_xlock().
4973 */
4974 hlock->gen_id = (unsigned int)atomic_read_acquire(&cross_gen_id);
4975
4976 if (cross_lock(hlock->instance))
4977 return add_xlock(hlock);
4978
4979 check_add_xhlock(hlock);
4980 return 2;
4981}
4982
4983static int copy_trace(struct stack_trace *trace)
4984{
4985 unsigned long *buf = stack_trace + nr_stack_trace_entries;
4986 unsigned int max_nr = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
4987 unsigned int nr = min(max_nr, trace->nr_entries);
4988
4989 trace->nr_entries = nr;
4990 memcpy(buf, trace->entries, nr * sizeof(trace->entries[0]));
4991 trace->entries = buf;
4992 nr_stack_trace_entries += nr;
4993
4994 if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) {
4995 if (!debug_locks_off_graph_unlock())
4996 return 0;
4997
4998 print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!");
4999 dump_stack();
5000
5001 return 0;
5002 }
5003
5004 return 1;
5005}
5006
5007static int commit_xhlock(struct cross_lock *xlock, struct hist_lock *xhlock)
5008{
5009 unsigned int xid, pid;
5010 u64 chain_key;
5011
5012 xid = xlock_class(xlock) - lock_classes;
5013 chain_key = iterate_chain_key((u64)0, xid);
5014 pid = xhlock_class(xhlock) - lock_classes;
5015 chain_key = iterate_chain_key(chain_key, pid);
5016
5017 if (lookup_chain_cache(chain_key))
5018 return 1;
5019
5020 if (!add_chain_cache_classes(xid, pid, xhlock->hlock.irq_context,
5021 chain_key))
5022 return 0;
5023
5024 if (!check_prev_add(current, &xlock->hlock, &xhlock->hlock, 1,
5025 &xhlock->trace, copy_trace))
5026 return 0;
5027
5028 return 1;
5029}
5030
5031static void commit_xhlocks(struct cross_lock *xlock)
5032{
5033 unsigned int cur = current->xhlock_idx;
5034 unsigned int prev_hist_id = xhlock(cur).hist_id;
5035 unsigned int i;
5036
5037 if (!graph_lock())
5038 return;
5039
5040 if (xlock->nr_acquire) {
5041 for (i = 0; i < MAX_XHLOCKS_NR; i++) {
5042 struct hist_lock *xhlock = &xhlock(cur - i);
5043
5044 if (!xhlock_valid(xhlock))
5045 break;
5046
5047 if (before(xhlock->hlock.gen_id, xlock->hlock.gen_id))
5048 break;
5049
5050 if (!same_context_xhlock(xhlock))
5051 break;
5052
5053 /*
5054 * Filter out the cases where the ring buffer was
5055 * overwritten and the current entry has a bigger
5056 * hist_id than the previous one, which is impossible
5057 * otherwise:
5058 */
5059 if (unlikely(before(prev_hist_id, xhlock->hist_id)))
5060 break;
5061
5062 prev_hist_id = xhlock->hist_id;
5063
5064 /*
5065 * commit_xhlock() returns 0 with graph_lock already
5066 * released if fail.
5067 */
5068 if (!commit_xhlock(xlock, xhlock))
5069 return;
5070 }
5071 }
5072
5073 graph_unlock();
5074}
5075
5076void lock_commit_crosslock(struct lockdep_map *lock)
5077{
5078 struct cross_lock *xlock;
5079 unsigned long flags;
5080
5081 if (unlikely(!debug_locks || current->lockdep_recursion))
5082 return;
5083
5084 if (!current->xhlocks)
5085 return;
5086
5087 /*
5088 * Do commit hist_locks with the cross_lock, only in case that
5089 * the cross_lock could depend on acquisitions after that.
5090 *
5091 * For example, if the cross_lock does not have the 'check' flag
5092 * then we don't need to check dependencies and commit for that.
5093 * Just skip it. In that case, of course, the cross_lock does
5094 * not depend on acquisitions ahead, either.
5095 *
5096 * WARNING: Don't do that in add_xlock() in advance. When an
5097 * acquisition context is different from the commit context,
5098 * invalid(skipped) cross_lock might be accessed.
5099 */
5100 if (!depend_after(&((struct lockdep_map_cross *)lock)->xlock.hlock))
5101 return;
5102
5103 raw_local_irq_save(flags);
5104 check_flags(flags);
5105 current->lockdep_recursion = 1;
5106 xlock = &((struct lockdep_map_cross *)lock)->xlock;
5107 commit_xhlocks(xlock);
5108 current->lockdep_recursion = 0;
5109 raw_local_irq_restore(flags);
5110}
5111EXPORT_SYMBOL_GPL(lock_commit_crosslock);
5112
5113/*
5114 * Return: 0 - failure;
5115 * 1 - crosslock, done;
5116 * 2 - normal lock, continue to held_lock[] ops.
5117 */
5118static int lock_release_crosslock(struct lockdep_map *lock)
5119{
5120 if (cross_lock(lock)) {
5121 if (!graph_lock())
5122 return 0;
5123 ((struct lockdep_map_cross *)lock)->xlock.nr_acquire--;
5124 graph_unlock();
5125 return 1;
5126 }
5127 return 2;
5128}
5129
5130static void cross_init(struct lockdep_map *lock, int cross)
5131{
5132 if (cross)
5133 ((struct lockdep_map_cross *)lock)->xlock.nr_acquire = 0;
5134
5135 lock->cross = cross;
5136
5137 /*
5138 * Crossrelease assumes that the ring buffer size of xhlocks
5139 * is aligned with power of 2. So force it on build.
5140 */
5141 BUILD_BUG_ON(MAX_XHLOCKS_NR & (MAX_XHLOCKS_NR - 1));
5142}
5143
5144void lockdep_init_task(struct task_struct *task)
5145{
5146 int i;
5147
5148 task->xhlock_idx = UINT_MAX;
5149 task->hist_id = 0;
5150
5151 for (i = 0; i < XHLOCK_CTX_NR; i++) {
5152 task->xhlock_idx_hist[i] = UINT_MAX;
5153 task->hist_id_save[i] = 0;
5154 }
5155
5156 task->xhlocks = kzalloc(sizeof(struct hist_lock) * MAX_XHLOCKS_NR,
5157 GFP_KERNEL);
5158}
5159
5160void lockdep_free_task(struct task_struct *task)
5161{
5162 if (task->xhlocks) {
5163 void *tmp = task->xhlocks;
5164 /* Diable crossrelease for current */
5165 task->xhlocks = NULL;
5166 kfree(tmp);
5167 }
5168}
5169#endif
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
index 1fd1a7543cdd..936f3d14dd6b 100644
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -66,12 +66,8 @@ void __lockfunc __raw_##op##_lock(locktype##_t *lock) \
66 break; \ 66 break; \
67 preempt_enable(); \ 67 preempt_enable(); \
68 \ 68 \
69 if (!(lock)->break_lock) \ 69 arch_##op##_relax(&lock->raw_lock); \
70 (lock)->break_lock = 1; \
71 while ((lock)->break_lock) \
72 arch_##op##_relax(&lock->raw_lock); \
73 } \ 70 } \
74 (lock)->break_lock = 0; \
75} \ 71} \
76 \ 72 \
77unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ 73unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \
@@ -86,12 +82,9 @@ unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \
86 local_irq_restore(flags); \ 82 local_irq_restore(flags); \
87 preempt_enable(); \ 83 preempt_enable(); \
88 \ 84 \
89 if (!(lock)->break_lock) \ 85 arch_##op##_relax(&lock->raw_lock); \
90 (lock)->break_lock = 1; \
91 while ((lock)->break_lock) \
92 arch_##op##_relax(&lock->raw_lock); \
93 } \ 86 } \
94 (lock)->break_lock = 0; \ 87 \
95 return flags; \ 88 return flags; \
96} \ 89} \
97 \ 90 \
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 75554f366fd3..644fa2e3d993 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5097,17 +5097,6 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
5097 return ret; 5097 return ret;
5098} 5098}
5099 5099
5100/**
5101 * sys_sched_rr_get_interval - return the default timeslice of a process.
5102 * @pid: pid of the process.
5103 * @interval: userspace pointer to the timeslice value.
5104 *
5105 * this syscall writes the default timeslice value of a given process
5106 * into the user-space timespec buffer. A value of '0' means infinity.
5107 *
5108 * Return: On success, 0 and the timeslice is in @interval. Otherwise,
5109 * an error code.
5110 */
5111static int sched_rr_get_interval(pid_t pid, struct timespec64 *t) 5100static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
5112{ 5101{
5113 struct task_struct *p; 5102 struct task_struct *p;
@@ -5144,6 +5133,17 @@ out_unlock:
5144 return retval; 5133 return retval;
5145} 5134}
5146 5135
5136/**
5137 * sys_sched_rr_get_interval - return the default timeslice of a process.
5138 * @pid: pid of the process.
5139 * @interval: userspace pointer to the timeslice value.
5140 *
5141 * this syscall writes the default timeslice value of a given process
5142 * into the user-space timespec buffer. A value of '0' means infinity.
5143 *
5144 * Return: On success, 0 and the timeslice is in @interval. Otherwise,
5145 * an error code.
5146 */
5147SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, 5147SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
5148 struct timespec __user *, interval) 5148 struct timespec __user *, interval)
5149{ 5149{
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 2f52ec0f1539..d6717a3331a1 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -244,7 +244,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
244#ifdef CONFIG_NO_HZ_COMMON 244#ifdef CONFIG_NO_HZ_COMMON
245static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) 245static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
246{ 246{
247 unsigned long idle_calls = tick_nohz_get_idle_calls(); 247 unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu);
248 bool ret = idle_calls == sg_cpu->saved_idle_calls; 248 bool ret = idle_calls == sg_cpu->saved_idle_calls;
249 249
250 sg_cpu->saved_idle_calls = idle_calls; 250 sg_cpu->saved_idle_calls = idle_calls;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 4056c19ca3f0..665ace2fc558 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2034,8 +2034,9 @@ static void pull_rt_task(struct rq *this_rq)
2034 bool resched = false; 2034 bool resched = false;
2035 struct task_struct *p; 2035 struct task_struct *p;
2036 struct rq *src_rq; 2036 struct rq *src_rq;
2037 int rt_overload_count = rt_overloaded(this_rq);
2037 2038
2038 if (likely(!rt_overloaded(this_rq))) 2039 if (likely(!rt_overload_count))
2039 return; 2040 return;
2040 2041
2041 /* 2042 /*
@@ -2044,6 +2045,11 @@ static void pull_rt_task(struct rq *this_rq)
2044 */ 2045 */
2045 smp_rmb(); 2046 smp_rmb();
2046 2047
2048 /* If we are the only overloaded CPU do nothing */
2049 if (rt_overload_count == 1 &&
2050 cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask))
2051 return;
2052
2047#ifdef HAVE_RT_PUSH_IPI 2053#ifdef HAVE_RT_PUSH_IPI
2048 if (sched_feat(RT_PUSH_IPI)) { 2054 if (sched_feat(RT_PUSH_IPI)) {
2049 tell_cpu_to_push(this_rq); 2055 tell_cpu_to_push(this_rq);
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index e776fc8cc1df..f6b5f19223d6 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -95,6 +95,7 @@ config NO_HZ_FULL
95 select RCU_NOCB_CPU 95 select RCU_NOCB_CPU
96 select VIRT_CPU_ACCOUNTING_GEN 96 select VIRT_CPU_ACCOUNTING_GEN
97 select IRQ_WORK 97 select IRQ_WORK
98 select CPU_ISOLATION
98 help 99 help
99 Adaptively try to shutdown the tick whenever possible, even when 100 Adaptively try to shutdown the tick whenever possible, even when
100 the CPU is running tasks. Typically this requires running a single 101 the CPU is running tasks. Typically this requires running a single
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 13d6881f908b..ec999f32c840 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -434,17 +434,22 @@ static struct pid *good_sigevent(sigevent_t * event)
434{ 434{
435 struct task_struct *rtn = current->group_leader; 435 struct task_struct *rtn = current->group_leader;
436 436
437 if ((event->sigev_notify & SIGEV_THREAD_ID ) && 437 switch (event->sigev_notify) {
438 (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) || 438 case SIGEV_SIGNAL | SIGEV_THREAD_ID:
439 !same_thread_group(rtn, current) || 439 rtn = find_task_by_vpid(event->sigev_notify_thread_id);
440 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) 440 if (!rtn || !same_thread_group(rtn, current))
441 return NULL;
442 /* FALLTHRU */
443 case SIGEV_SIGNAL:
444 case SIGEV_THREAD:
445 if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX)
446 return NULL;
447 /* FALLTHRU */
448 case SIGEV_NONE:
449 return task_pid(rtn);
450 default:
441 return NULL; 451 return NULL;
442 452 }
443 if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
444 ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
445 return NULL;
446
447 return task_pid(rtn);
448} 453}
449 454
450static struct k_itimer * alloc_posix_timer(void) 455static struct k_itimer * alloc_posix_timer(void)
@@ -669,7 +674,7 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting)
669 struct timespec64 ts64; 674 struct timespec64 ts64;
670 bool sig_none; 675 bool sig_none;
671 676
672 sig_none = (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE; 677 sig_none = timr->it_sigev_notify == SIGEV_NONE;
673 iv = timr->it_interval; 678 iv = timr->it_interval;
674 679
675 /* interval timer ? */ 680 /* interval timer ? */
@@ -856,7 +861,7 @@ int common_timer_set(struct k_itimer *timr, int flags,
856 861
857 timr->it_interval = timespec64_to_ktime(new_setting->it_interval); 862 timr->it_interval = timespec64_to_ktime(new_setting->it_interval);
858 expires = timespec64_to_ktime(new_setting->it_value); 863 expires = timespec64_to_ktime(new_setting->it_value);
859 sigev_none = (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE; 864 sigev_none = timr->it_sigev_notify == SIGEV_NONE;
860 865
861 kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none); 866 kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none);
862 timr->it_active = !sigev_none; 867 timr->it_active = !sigev_none;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 99578f06c8d4..f7cc7abfcf25 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -650,6 +650,11 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
650 ts->next_tick = 0; 650 ts->next_tick = 0;
651} 651}
652 652
653static inline bool local_timer_softirq_pending(void)
654{
655 return local_softirq_pending() & TIMER_SOFTIRQ;
656}
657
653static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, 658static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
654 ktime_t now, int cpu) 659 ktime_t now, int cpu)
655{ 660{
@@ -666,8 +671,18 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
666 } while (read_seqretry(&jiffies_lock, seq)); 671 } while (read_seqretry(&jiffies_lock, seq));
667 ts->last_jiffies = basejiff; 672 ts->last_jiffies = basejiff;
668 673
669 if (rcu_needs_cpu(basemono, &next_rcu) || 674 /*
670 arch_needs_cpu() || irq_work_needs_cpu()) { 675 * Keep the periodic tick, when RCU, architecture or irq_work
676 * requests it.
677 * Aside of that check whether the local timer softirq is
678 * pending. If so its a bad idea to call get_next_timer_interrupt()
679 * because there is an already expired timer, so it will request
680 * immeditate expiry, which rearms the hardware timer with a
681 * minimal delta which brings us back to this place
682 * immediately. Lather, rinse and repeat...
683 */
684 if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
685 irq_work_needs_cpu() || local_timer_softirq_pending()) {
671 next_tick = basemono + TICK_NSEC; 686 next_tick = basemono + TICK_NSEC;
672 } else { 687 } else {
673 /* 688 /*
@@ -986,6 +1001,19 @@ ktime_t tick_nohz_get_sleep_length(void)
986} 1001}
987 1002
988/** 1003/**
1004 * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
1005 * for a particular CPU.
1006 *
1007 * Called from the schedutil frequency scaling governor in scheduler context.
1008 */
1009unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
1010{
1011 struct tick_sched *ts = tick_get_tick_sched(cpu);
1012
1013 return ts->idle_calls;
1014}
1015
1016/**
989 * tick_nohz_get_idle_calls - return the current idle calls counter value 1017 * tick_nohz_get_idle_calls - return the current idle calls counter value
990 * 1018 *
991 * Called from the schedutil frequency scaling governor in scheduler context. 1019 * Called from the schedutil frequency scaling governor in scheduler context.
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index ffebcf878fba..89a9e1b4264a 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -823,11 +823,10 @@ static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu)
823 struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu); 823 struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu);
824 824
825 /* 825 /*
826 * If the timer is deferrable and nohz is active then we need to use 826 * If the timer is deferrable and NO_HZ_COMMON is set then we need
827 * the deferrable base. 827 * to use the deferrable base.
828 */ 828 */
829 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && 829 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
830 (tflags & TIMER_DEFERRABLE))
831 base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu); 830 base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu);
832 return base; 831 return base;
833} 832}
@@ -837,11 +836,10 @@ static inline struct timer_base *get_timer_this_cpu_base(u32 tflags)
837 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); 836 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
838 837
839 /* 838 /*
840 * If the timer is deferrable and nohz is active then we need to use 839 * If the timer is deferrable and NO_HZ_COMMON is set then we need
841 * the deferrable base. 840 * to use the deferrable base.
842 */ 841 */
843 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && 842 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
844 (tflags & TIMER_DEFERRABLE))
845 base = this_cpu_ptr(&timer_bases[BASE_DEF]); 843 base = this_cpu_ptr(&timer_bases[BASE_DEF]);
846 return base; 844 return base;
847} 845}
@@ -1009,8 +1007,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option
1009 if (!ret && (options & MOD_TIMER_PENDING_ONLY)) 1007 if (!ret && (options & MOD_TIMER_PENDING_ONLY))
1010 goto out_unlock; 1008 goto out_unlock;
1011 1009
1012 debug_activate(timer, expires);
1013
1014 new_base = get_target_base(base, timer->flags); 1010 new_base = get_target_base(base, timer->flags);
1015 1011
1016 if (base != new_base) { 1012 if (base != new_base) {
@@ -1034,6 +1030,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option
1034 } 1030 }
1035 } 1031 }
1036 1032
1033 debug_activate(timer, expires);
1034
1037 timer->expires = expires; 1035 timer->expires = expires;
1038 /* 1036 /*
1039 * If 'idx' was calculated above and the base time did not advance 1037 * If 'idx' was calculated above and the base time did not advance
@@ -1684,7 +1682,7 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
1684 base->must_forward_clk = false; 1682 base->must_forward_clk = false;
1685 1683
1686 __run_timers(base); 1684 __run_timers(base);
1687 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) 1685 if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
1688 __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); 1686 __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
1689} 1687}
1690 1688
@@ -1855,6 +1853,21 @@ static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *h
1855 } 1853 }
1856} 1854}
1857 1855
1856int timers_prepare_cpu(unsigned int cpu)
1857{
1858 struct timer_base *base;
1859 int b;
1860
1861 for (b = 0; b < NR_BASES; b++) {
1862 base = per_cpu_ptr(&timer_bases[b], cpu);
1863 base->clk = jiffies;
1864 base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
1865 base->is_idle = false;
1866 base->must_forward_clk = true;
1867 }
1868 return 0;
1869}
1870
1858int timers_dead_cpu(unsigned int cpu) 1871int timers_dead_cpu(unsigned int cpu)
1859{ 1872{
1860 struct timer_base *old_base; 1873 struct timer_base *old_base;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index af7dad126c13..904c952ac383 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -164,6 +164,7 @@ config PREEMPTIRQ_EVENTS
164 bool "Enable trace events for preempt and irq disable/enable" 164 bool "Enable trace events for preempt and irq disable/enable"
165 select TRACE_IRQFLAGS 165 select TRACE_IRQFLAGS
166 depends on DEBUG_PREEMPT || !PROVE_LOCKING 166 depends on DEBUG_PREEMPT || !PROVE_LOCKING
167 depends on TRACING
167 default n 168 default n
168 help 169 help
169 Enable tracing of disable and enable events for preemption and irqs. 170 Enable tracing of disable and enable events for preemption and irqs.
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0ce99c379c30..40207c2a4113 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -343,14 +343,13 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
343 .arg4_type = ARG_CONST_SIZE, 343 .arg4_type = ARG_CONST_SIZE,
344}; 344};
345 345
346static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd); 346static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd);
347 347
348static __always_inline u64 348static __always_inline u64
349__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, 349__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
350 u64 flags, struct perf_raw_record *raw) 350 u64 flags, struct perf_sample_data *sd)
351{ 351{
352 struct bpf_array *array = container_of(map, struct bpf_array, map); 352 struct bpf_array *array = container_of(map, struct bpf_array, map);
353 struct perf_sample_data *sd = this_cpu_ptr(&bpf_sd);
354 unsigned int cpu = smp_processor_id(); 353 unsigned int cpu = smp_processor_id();
355 u64 index = flags & BPF_F_INDEX_MASK; 354 u64 index = flags & BPF_F_INDEX_MASK;
356 struct bpf_event_entry *ee; 355 struct bpf_event_entry *ee;
@@ -373,8 +372,6 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
373 if (unlikely(event->oncpu != cpu)) 372 if (unlikely(event->oncpu != cpu))
374 return -EOPNOTSUPP; 373 return -EOPNOTSUPP;
375 374
376 perf_sample_data_init(sd, 0, 0);
377 sd->raw = raw;
378 perf_event_output(event, sd, regs); 375 perf_event_output(event, sd, regs);
379 return 0; 376 return 0;
380} 377}
@@ -382,6 +379,7 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
382BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, 379BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
383 u64, flags, void *, data, u64, size) 380 u64, flags, void *, data, u64, size)
384{ 381{
382 struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd);
385 struct perf_raw_record raw = { 383 struct perf_raw_record raw = {
386 .frag = { 384 .frag = {
387 .size = size, 385 .size = size,
@@ -392,7 +390,10 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
392 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) 390 if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
393 return -EINVAL; 391 return -EINVAL;
394 392
395 return __bpf_perf_event_output(regs, map, flags, &raw); 393 perf_sample_data_init(sd, 0, 0);
394 sd->raw = &raw;
395
396 return __bpf_perf_event_output(regs, map, flags, sd);
396} 397}
397 398
398static const struct bpf_func_proto bpf_perf_event_output_proto = { 399static const struct bpf_func_proto bpf_perf_event_output_proto = {
@@ -407,10 +408,12 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
407}; 408};
408 409
409static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); 410static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
411static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd);
410 412
411u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, 413u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
412 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) 414 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
413{ 415{
416 struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd);
414 struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs); 417 struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
415 struct perf_raw_frag frag = { 418 struct perf_raw_frag frag = {
416 .copy = ctx_copy, 419 .copy = ctx_copy,
@@ -428,8 +431,10 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
428 }; 431 };
429 432
430 perf_fetch_caller_regs(regs); 433 perf_fetch_caller_regs(regs);
434 perf_sample_data_init(sd, 0, 0);
435 sd->raw = &raw;
431 436
432 return __bpf_perf_event_output(regs, map, flags, &raw); 437 return __bpf_perf_event_output(regs, map, flags, sd);
433} 438}
434 439
435BPF_CALL_0(bpf_get_current_task) 440BPF_CALL_0(bpf_get_current_task)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 91874a95060d..9ab18995ff1e 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -280,6 +280,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
280/* Missed count stored at end */ 280/* Missed count stored at end */
281#define RB_MISSED_STORED (1 << 30) 281#define RB_MISSED_STORED (1 << 30)
282 282
283#define RB_MISSED_FLAGS (RB_MISSED_EVENTS|RB_MISSED_STORED)
284
283struct buffer_data_page { 285struct buffer_data_page {
284 u64 time_stamp; /* page time stamp */ 286 u64 time_stamp; /* page time stamp */
285 local_t commit; /* write committed index */ 287 local_t commit; /* write committed index */
@@ -331,7 +333,9 @@ static void rb_init_page(struct buffer_data_page *bpage)
331 */ 333 */
332size_t ring_buffer_page_len(void *page) 334size_t ring_buffer_page_len(void *page)
333{ 335{
334 return local_read(&((struct buffer_data_page *)page)->commit) 336 struct buffer_data_page *bpage = page;
337
338 return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS)
335 + BUF_PAGE_HDR_SIZE; 339 + BUF_PAGE_HDR_SIZE;
336} 340}
337 341
@@ -1799,12 +1803,6 @@ void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val)
1799} 1803}
1800EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite); 1804EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
1801 1805
1802static __always_inline void *
1803__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
1804{
1805 return bpage->data + index;
1806}
1807
1808static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index) 1806static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
1809{ 1807{
1810 return bpage->page->data + index; 1808 return bpage->page->data + index;
@@ -4406,8 +4404,13 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data)
4406{ 4404{
4407 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 4405 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4408 struct buffer_data_page *bpage = data; 4406 struct buffer_data_page *bpage = data;
4407 struct page *page = virt_to_page(bpage);
4409 unsigned long flags; 4408 unsigned long flags;
4410 4409
4410 /* If the page is still in use someplace else, we can't reuse it */
4411 if (page_ref_count(page) > 1)
4412 goto out;
4413
4411 local_irq_save(flags); 4414 local_irq_save(flags);
4412 arch_spin_lock(&cpu_buffer->lock); 4415 arch_spin_lock(&cpu_buffer->lock);
4413 4416
@@ -4419,6 +4422,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data)
4419 arch_spin_unlock(&cpu_buffer->lock); 4422 arch_spin_unlock(&cpu_buffer->lock);
4420 local_irq_restore(flags); 4423 local_irq_restore(flags);
4421 4424
4425 out:
4422 free_page((unsigned long)bpage); 4426 free_page((unsigned long)bpage);
4423} 4427}
4424EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); 4428EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 73e67b68c53b..2a8d8a294345 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -362,7 +362,7 @@ trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct
362} 362}
363 363
364/** 364/**
365 * trace_pid_filter_add_remove - Add or remove a task from a pid_list 365 * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
366 * @pid_list: The list to modify 366 * @pid_list: The list to modify
367 * @self: The current task for fork or NULL for exit 367 * @self: The current task for fork or NULL for exit
368 * @task: The task to add or remove 368 * @task: The task to add or remove
@@ -925,7 +925,7 @@ static void tracing_snapshot_instance(struct trace_array *tr)
925} 925}
926 926
927/** 927/**
928 * trace_snapshot - take a snapshot of the current buffer. 928 * tracing_snapshot - take a snapshot of the current buffer.
929 * 929 *
930 * This causes a swap between the snapshot buffer and the current live 930 * This causes a swap between the snapshot buffer and the current live
931 * tracing buffer. You can use this to take snapshots of the live 931 * tracing buffer. You can use this to take snapshots of the live
@@ -1004,9 +1004,9 @@ int tracing_alloc_snapshot(void)
1004EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); 1004EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005 1005
1006/** 1006/**
1007 * trace_snapshot_alloc - allocate and take a snapshot of the current buffer. 1007 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008 * 1008 *
1009 * This is similar to trace_snapshot(), but it will allocate the 1009 * This is similar to tracing_snapshot(), but it will allocate the
1010 * snapshot buffer if it isn't already allocated. Use this only 1010 * snapshot buffer if it isn't already allocated. Use this only
1011 * where it is safe to sleep, as the allocation may sleep. 1011 * where it is safe to sleep, as the allocation may sleep.
1012 * 1012 *
@@ -1303,7 +1303,7 @@ unsigned long __read_mostly tracing_thresh;
1303/* 1303/*
1304 * Copy the new maximum trace into the separate maximum-trace 1304 * Copy the new maximum trace into the separate maximum-trace
1305 * structure. (this way the maximum trace is permanently saved, 1305 * structure. (this way the maximum trace is permanently saved,
1306 * for later retrieval via /sys/kernel/debug/tracing/latency_trace) 1306 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1307 */ 1307 */
1308static void 1308static void
1309__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 1309__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
@@ -2415,7 +2415,7 @@ trace_process_export(struct trace_export *export,
2415 2415
2416 entry = ring_buffer_event_data(event); 2416 entry = ring_buffer_event_data(event);
2417 size = ring_buffer_event_length(event); 2417 size = ring_buffer_event_length(event);
2418 export->write(entry, size); 2418 export->write(export, entry, size);
2419} 2419}
2420 2420
2421static DEFINE_MUTEX(ftrace_export_lock); 2421static DEFINE_MUTEX(ftrace_export_lock);
@@ -4178,37 +4178,30 @@ static const struct file_operations show_traces_fops = {
4178 .llseek = seq_lseek, 4178 .llseek = seq_lseek,
4179}; 4179};
4180 4180
4181/*
4182 * The tracer itself will not take this lock, but still we want
4183 * to provide a consistent cpumask to user-space:
4184 */
4185static DEFINE_MUTEX(tracing_cpumask_update_lock);
4186
4187/*
4188 * Temporary storage for the character representation of the
4189 * CPU bitmask (and one more byte for the newline):
4190 */
4191static char mask_str[NR_CPUS + 1];
4192
4193static ssize_t 4181static ssize_t
4194tracing_cpumask_read(struct file *filp, char __user *ubuf, 4182tracing_cpumask_read(struct file *filp, char __user *ubuf,
4195 size_t count, loff_t *ppos) 4183 size_t count, loff_t *ppos)
4196{ 4184{
4197 struct trace_array *tr = file_inode(filp)->i_private; 4185 struct trace_array *tr = file_inode(filp)->i_private;
4186 char *mask_str;
4198 int len; 4187 int len;
4199 4188
4200 mutex_lock(&tracing_cpumask_update_lock); 4189 len = snprintf(NULL, 0, "%*pb\n",
4190 cpumask_pr_args(tr->tracing_cpumask)) + 1;
4191 mask_str = kmalloc(len, GFP_KERNEL);
4192 if (!mask_str)
4193 return -ENOMEM;
4201 4194
4202 len = snprintf(mask_str, count, "%*pb\n", 4195 len = snprintf(mask_str, len, "%*pb\n",
4203 cpumask_pr_args(tr->tracing_cpumask)); 4196 cpumask_pr_args(tr->tracing_cpumask));
4204 if (len >= count) { 4197 if (len >= count) {
4205 count = -EINVAL; 4198 count = -EINVAL;
4206 goto out_err; 4199 goto out_err;
4207 } 4200 }
4208 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1); 4201 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4209 4202
4210out_err: 4203out_err:
4211 mutex_unlock(&tracing_cpumask_update_lock); 4204 kfree(mask_str);
4212 4205
4213 return count; 4206 return count;
4214} 4207}
@@ -4228,8 +4221,6 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4228 if (err) 4221 if (err)
4229 goto err_unlock; 4222 goto err_unlock;
4230 4223
4231 mutex_lock(&tracing_cpumask_update_lock);
4232
4233 local_irq_disable(); 4224 local_irq_disable();
4234 arch_spin_lock(&tr->max_lock); 4225 arch_spin_lock(&tr->max_lock);
4235 for_each_tracing_cpu(cpu) { 4226 for_each_tracing_cpu(cpu) {
@@ -4252,8 +4243,6 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4252 local_irq_enable(); 4243 local_irq_enable();
4253 4244
4254 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); 4245 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4255
4256 mutex_unlock(&tracing_cpumask_update_lock);
4257 free_cpumask_var(tracing_cpumask_new); 4246 free_cpumask_var(tracing_cpumask_new);
4258 4247
4259 return count; 4248 return count;
@@ -6780,7 +6769,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6780 .spd_release = buffer_spd_release, 6769 .spd_release = buffer_spd_release,
6781 }; 6770 };
6782 struct buffer_ref *ref; 6771 struct buffer_ref *ref;
6783 int entries, size, i; 6772 int entries, i;
6784 ssize_t ret = 0; 6773 ssize_t ret = 0;
6785 6774
6786#ifdef CONFIG_TRACER_MAX_TRACE 6775#ifdef CONFIG_TRACER_MAX_TRACE
@@ -6834,14 +6823,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6834 break; 6823 break;
6835 } 6824 }
6836 6825
6837 /*
6838 * zero out any left over data, this is going to
6839 * user land.
6840 */
6841 size = ring_buffer_page_len(ref->page);
6842 if (size < PAGE_SIZE)
6843 memset(ref->page + size, 0, PAGE_SIZE - size);
6844
6845 page = virt_to_page(ref->page); 6826 page = virt_to_page(ref->page);
6846 6827
6847 spd.pages[i] = page; 6828 spd.pages[i] = page;
@@ -7599,6 +7580,7 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size
7599 buf->data = alloc_percpu(struct trace_array_cpu); 7580 buf->data = alloc_percpu(struct trace_array_cpu);
7600 if (!buf->data) { 7581 if (!buf->data) {
7601 ring_buffer_free(buf->buffer); 7582 ring_buffer_free(buf->buffer);
7583 buf->buffer = NULL;
7602 return -ENOMEM; 7584 return -ENOMEM;
7603 } 7585 }
7604 7586
@@ -7622,7 +7604,9 @@ static int allocate_trace_buffers(struct trace_array *tr, int size)
7622 allocate_snapshot ? size : 1); 7604 allocate_snapshot ? size : 1);
7623 if (WARN_ON(ret)) { 7605 if (WARN_ON(ret)) {
7624 ring_buffer_free(tr->trace_buffer.buffer); 7606 ring_buffer_free(tr->trace_buffer.buffer);
7607 tr->trace_buffer.buffer = NULL;
7625 free_percpu(tr->trace_buffer.data); 7608 free_percpu(tr->trace_buffer.data);
7609 tr->trace_buffer.data = NULL;
7626 return -ENOMEM; 7610 return -ENOMEM;
7627 } 7611 }
7628 tr->allocated_snapshot = allocate_snapshot; 7612 tr->allocated_snapshot = allocate_snapshot;
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 734accc02418..3c7bfc4bf5e9 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -209,6 +209,10 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip,
209 if (__this_cpu_read(disable_stack_tracer) != 1) 209 if (__this_cpu_read(disable_stack_tracer) != 1)
210 goto out; 210 goto out;
211 211
212 /* If rcu is not watching, then save stack trace can fail */
213 if (!rcu_is_watching())
214 goto out;
215
212 ip += MCOUNT_INSN_SIZE; 216 ip += MCOUNT_INSN_SIZE;
213 217
214 check_stack(ip, &stack); 218 check_stack(ip, &stack);
diff --git a/kernel/uid16.c b/kernel/uid16.c
index ce74a4901d2b..ef1da2a5f9bd 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -192,6 +192,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
192 return retval; 192 return retval;
193 } 193 }
194 194
195 groups_sort(group_info);
195 retval = set_current_groups(group_info); 196 retval = set_current_groups(group_info);
196 put_group_info(group_info); 197 put_group_info(group_info);
197 198
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 8fdb710bfdd7..43d18cb46308 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -38,7 +38,6 @@
38#include <linux/hardirq.h> 38#include <linux/hardirq.h>
39#include <linux/mempolicy.h> 39#include <linux/mempolicy.h>
40#include <linux/freezer.h> 40#include <linux/freezer.h>
41#include <linux/kallsyms.h>
42#include <linux/debug_locks.h> 41#include <linux/debug_locks.h>
43#include <linux/lockdep.h> 42#include <linux/lockdep.h>
44#include <linux/idr.h> 43#include <linux/idr.h>
@@ -48,6 +47,7 @@
48#include <linux/nodemask.h> 47#include <linux/nodemask.h>
49#include <linux/moduleparam.h> 48#include <linux/moduleparam.h>
50#include <linux/uaccess.h> 49#include <linux/uaccess.h>
50#include <linux/sched/isolation.h>
51 51
52#include "workqueue_internal.h" 52#include "workqueue_internal.h"
53 53
@@ -1634,7 +1634,7 @@ static void worker_enter_idle(struct worker *worker)
1634 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); 1634 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
1635 1635
1636 /* 1636 /*
1637 * Sanity check nr_running. Because wq_unbind_fn() releases 1637 * Sanity check nr_running. Because unbind_workers() releases
1638 * pool->lock between setting %WORKER_UNBOUND and zapping 1638 * pool->lock between setting %WORKER_UNBOUND and zapping
1639 * nr_running, the warning may trigger spuriously. Check iff 1639 * nr_running, the warning may trigger spuriously. Check iff
1640 * unbind is not in progress. 1640 * unbind is not in progress.
@@ -4510,9 +4510,8 @@ void show_workqueue_state(void)
4510 * cpu comes back online. 4510 * cpu comes back online.
4511 */ 4511 */
4512 4512
4513static void wq_unbind_fn(struct work_struct *work) 4513static void unbind_workers(int cpu)
4514{ 4514{
4515 int cpu = smp_processor_id();
4516 struct worker_pool *pool; 4515 struct worker_pool *pool;
4517 struct worker *worker; 4516 struct worker *worker;
4518 4517
@@ -4589,16 +4588,6 @@ static void rebind_workers(struct worker_pool *pool)
4589 4588
4590 spin_lock_irq(&pool->lock); 4589 spin_lock_irq(&pool->lock);
4591 4590
4592 /*
4593 * XXX: CPU hotplug notifiers are weird and can call DOWN_FAILED
4594 * w/o preceding DOWN_PREPARE. Work around it. CPU hotplug is
4595 * being reworked and this can go away in time.
4596 */
4597 if (!(pool->flags & POOL_DISASSOCIATED)) {
4598 spin_unlock_irq(&pool->lock);
4599 return;
4600 }
4601
4602 pool->flags &= ~POOL_DISASSOCIATED; 4591 pool->flags &= ~POOL_DISASSOCIATED;
4603 4592
4604 for_each_pool_worker(worker, pool) { 4593 for_each_pool_worker(worker, pool) {
@@ -4709,12 +4698,13 @@ int workqueue_online_cpu(unsigned int cpu)
4709 4698
4710int workqueue_offline_cpu(unsigned int cpu) 4699int workqueue_offline_cpu(unsigned int cpu)
4711{ 4700{
4712 struct work_struct unbind_work;
4713 struct workqueue_struct *wq; 4701 struct workqueue_struct *wq;
4714 4702
4715 /* unbinding per-cpu workers should happen on the local CPU */ 4703 /* unbinding per-cpu workers should happen on the local CPU */
4716 INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn); 4704 if (WARN_ON(cpu != smp_processor_id()))
4717 queue_work_on(cpu, system_highpri_wq, &unbind_work); 4705 return -1;
4706
4707 unbind_workers(cpu);
4718 4708
4719 /* update NUMA affinity of unbound workqueues */ 4709 /* update NUMA affinity of unbound workqueues */
4720 mutex_lock(&wq_pool_mutex); 4710 mutex_lock(&wq_pool_mutex);
@@ -4722,9 +4712,6 @@ int workqueue_offline_cpu(unsigned int cpu)
4722 wq_update_unbound_numa(wq, cpu, false); 4712 wq_update_unbound_numa(wq, cpu, false);
4723 mutex_unlock(&wq_pool_mutex); 4713 mutex_unlock(&wq_pool_mutex);
4724 4714
4725 /* wait for per-cpu unbinding to finish */
4726 flush_work(&unbind_work);
4727 destroy_work_on_stack(&unbind_work);
4728 return 0; 4715 return 0;
4729} 4716}
4730 4717
@@ -4957,6 +4944,10 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
4957 if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL)) 4944 if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL))
4958 return -ENOMEM; 4945 return -ENOMEM;
4959 4946
4947 /*
4948 * Not excluding isolated cpus on purpose.
4949 * If the user wishes to include them, we allow that.
4950 */
4960 cpumask_and(cpumask, cpumask, cpu_possible_mask); 4951 cpumask_and(cpumask, cpumask, cpu_possible_mask);
4961 if (!cpumask_empty(cpumask)) { 4952 if (!cpumask_empty(cpumask)) {
4962 apply_wqattrs_lock(); 4953 apply_wqattrs_lock();
@@ -5555,7 +5546,7 @@ int __init workqueue_init_early(void)
5555 WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long)); 5546 WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
5556 5547
5557 BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL)); 5548 BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
5558 cpumask_copy(wq_unbound_cpumask, cpu_possible_mask); 5549 cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_FLAG_DOMAIN));
5559 5550
5560 pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); 5551 pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
5561 5552
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 947d3e2ed5c2..9d5b78aad4c5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1099,8 +1099,6 @@ config PROVE_LOCKING
1099 select DEBUG_MUTEXES 1099 select DEBUG_MUTEXES
1100 select DEBUG_RT_MUTEXES if RT_MUTEXES 1100 select DEBUG_RT_MUTEXES if RT_MUTEXES
1101 select DEBUG_LOCK_ALLOC 1101 select DEBUG_LOCK_ALLOC
1102 select LOCKDEP_CROSSRELEASE
1103 select LOCKDEP_COMPLETIONS
1104 select TRACE_IRQFLAGS 1102 select TRACE_IRQFLAGS
1105 default n 1103 default n
1106 help 1104 help
@@ -1170,37 +1168,6 @@ config LOCK_STAT
1170 CONFIG_LOCK_STAT defines "contended" and "acquired" lock events. 1168 CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
1171 (CONFIG_LOCKDEP defines "acquire" and "release" events.) 1169 (CONFIG_LOCKDEP defines "acquire" and "release" events.)
1172 1170
1173config LOCKDEP_CROSSRELEASE
1174 bool
1175 help
1176 This makes lockdep work for crosslock which is a lock allowed to
1177 be released in a different context from the acquisition context.
1178 Normally a lock must be released in the context acquiring the lock.
1179 However, relexing this constraint helps synchronization primitives
1180 such as page locks or completions can use the lock correctness
1181 detector, lockdep.
1182
1183config LOCKDEP_COMPLETIONS
1184 bool
1185 help
1186 A deadlock caused by wait_for_completion() and complete() can be
1187 detected by lockdep using crossrelease feature.
1188
1189config BOOTPARAM_LOCKDEP_CROSSRELEASE_FULLSTACK
1190 bool "Enable the boot parameter, crossrelease_fullstack"
1191 depends on LOCKDEP_CROSSRELEASE
1192 default n
1193 help
1194 The lockdep "cross-release" feature needs to record stack traces
1195 (of calling functions) for all acquisitions, for eventual later
1196 use during analysis. By default only a single caller is recorded,
1197 because the unwind operation can be very expensive with deeper
1198 stack chains.
1199
1200 However a boot parameter, crossrelease_fullstack, was
1201 introduced since sometimes deeper traces are required for full
1202 analysis. This option turns on the boot parameter.
1203
1204config DEBUG_LOCKDEP 1171config DEBUG_LOCKDEP
1205 bool "Lock dependency engine debugging" 1172 bool "Lock dependency engine debugging"
1206 depends on DEBUG_KERNEL && LOCKDEP 1173 depends on DEBUG_KERNEL && LOCKDEP
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index c3e84edc47c9..2615074d3de5 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -346,7 +346,8 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj,
346static void zap_modalias_env(struct kobj_uevent_env *env) 346static void zap_modalias_env(struct kobj_uevent_env *env)
347{ 347{
348 static const char modalias_prefix[] = "MODALIAS="; 348 static const char modalias_prefix[] = "MODALIAS=";
349 int i; 349 size_t len;
350 int i, j;
350 351
351 for (i = 0; i < env->envp_idx;) { 352 for (i = 0; i < env->envp_idx;) {
352 if (strncmp(env->envp[i], modalias_prefix, 353 if (strncmp(env->envp[i], modalias_prefix,
@@ -355,11 +356,18 @@ static void zap_modalias_env(struct kobj_uevent_env *env)
355 continue; 356 continue;
356 } 357 }
357 358
358 if (i != env->envp_idx - 1) 359 len = strlen(env->envp[i]) + 1;
359 memmove(&env->envp[i], &env->envp[i + 1], 360
360 sizeof(env->envp[i]) * env->envp_idx - 1); 361 if (i != env->envp_idx - 1) {
362 memmove(env->envp[i], env->envp[i + 1],
363 env->buflen - len);
364
365 for (j = i; j < env->envp_idx - 1; j++)
366 env->envp[j] = env->envp[j + 1] - len;
367 }
361 368
362 env->envp_idx--; 369 env->envp_idx--;
370 env->buflen -= len;
363 } 371 }
364} 372}
365 373
diff --git a/lib/rbtree.c b/lib/rbtree.c
index ba4a9d165f1b..d3ff682fd4b8 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -603,6 +603,16 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new,
603} 603}
604EXPORT_SYMBOL(rb_replace_node); 604EXPORT_SYMBOL(rb_replace_node);
605 605
606void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new,
607 struct rb_root_cached *root)
608{
609 rb_replace_node(victim, new, &root->rb_root);
610
611 if (root->rb_leftmost == victim)
612 root->rb_leftmost = new;
613}
614EXPORT_SYMBOL(rb_replace_node_cached);
615
606void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new, 616void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
607 struct rb_root *root) 617 struct rb_root *root)
608{ 618{
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index aa8812ae6776..9e9748089270 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -435,6 +435,41 @@ loop:
435 return 0; 435 return 0;
436} 436}
437 437
438static int bpf_fill_ld_abs_vlan_push_pop2(struct bpf_test *self)
439{
440 struct bpf_insn *insn;
441
442 insn = kmalloc_array(16, sizeof(*insn), GFP_KERNEL);
443 if (!insn)
444 return -ENOMEM;
445
446 /* Due to func address being non-const, we need to
447 * assemble this here.
448 */
449 insn[0] = BPF_MOV64_REG(R6, R1);
450 insn[1] = BPF_LD_ABS(BPF_B, 0);
451 insn[2] = BPF_LD_ABS(BPF_H, 0);
452 insn[3] = BPF_LD_ABS(BPF_W, 0);
453 insn[4] = BPF_MOV64_REG(R7, R6);
454 insn[5] = BPF_MOV64_IMM(R6, 0);
455 insn[6] = BPF_MOV64_REG(R1, R7);
456 insn[7] = BPF_MOV64_IMM(R2, 1);
457 insn[8] = BPF_MOV64_IMM(R3, 2);
458 insn[9] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
459 bpf_skb_vlan_push_proto.func - __bpf_call_base);
460 insn[10] = BPF_MOV64_REG(R6, R7);
461 insn[11] = BPF_LD_ABS(BPF_B, 0);
462 insn[12] = BPF_LD_ABS(BPF_H, 0);
463 insn[13] = BPF_LD_ABS(BPF_W, 0);
464 insn[14] = BPF_MOV64_IMM(R0, 42);
465 insn[15] = BPF_EXIT_INSN();
466
467 self->u.ptr.insns = insn;
468 self->u.ptr.len = 16;
469
470 return 0;
471}
472
438static int bpf_fill_jump_around_ld_abs(struct bpf_test *self) 473static int bpf_fill_jump_around_ld_abs(struct bpf_test *self)
439{ 474{
440 unsigned int len = BPF_MAXINSNS; 475 unsigned int len = BPF_MAXINSNS;
@@ -6066,6 +6101,14 @@ static struct bpf_test tests[] = {
6066 {}, 6101 {},
6067 { {0x1, 0x42 } }, 6102 { {0x1, 0x42 } },
6068 }, 6103 },
6104 {
6105 "LD_ABS with helper changing skb data",
6106 { },
6107 INTERNAL,
6108 { 0x34 },
6109 { { ETH_HLEN, 42 } },
6110 .fill_helper = bpf_fill_ld_abs_vlan_push_pop2,
6111 },
6069}; 6112};
6070 6113
6071static struct net_device dev; 6114static struct net_device dev;
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
index 4a720ed4fdaf..0d54bcbc8170 100644
--- a/lib/timerqueue.c
+++ b/lib/timerqueue.c
@@ -33,8 +33,9 @@
33 * @head: head of timerqueue 33 * @head: head of timerqueue
34 * @node: timer node to be added 34 * @node: timer node to be added
35 * 35 *
36 * Adds the timer node to the timerqueue, sorted by the 36 * Adds the timer node to the timerqueue, sorted by the node's expires
37 * node's expires value. 37 * value. Returns true if the newly added timer is the first expiring timer in
38 * the queue.
38 */ 39 */
39bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) 40bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
40{ 41{
@@ -70,7 +71,8 @@ EXPORT_SYMBOL_GPL(timerqueue_add);
70 * @head: head of timerqueue 71 * @head: head of timerqueue
71 * @node: timer node to be removed 72 * @node: timer node to be removed
72 * 73 *
73 * Removes the timer node from the timerqueue. 74 * Removes the timer node from the timerqueue. Returns true if the queue is
75 * not empty after the remove.
74 */ 76 */
75bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) 77bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
76{ 78{
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 84b2dc76f140..b5f940ce0143 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -882,13 +882,10 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
882 if (IS_ERR(dev)) 882 if (IS_ERR(dev))
883 return PTR_ERR(dev); 883 return PTR_ERR(dev);
884 884
885 if (bdi_debug_register(bdi, dev_name(dev))) {
886 device_destroy(bdi_class, dev->devt);
887 return -ENOMEM;
888 }
889 cgwb_bdi_register(bdi); 885 cgwb_bdi_register(bdi);
890 bdi->dev = dev; 886 bdi->dev = dev;
891 887
888 bdi_debug_register(bdi, dev_name(dev));
892 set_bit(WB_registered, &bdi->wb.state); 889 set_bit(WB_registered, &bdi->wb.state);
893 890
894 spin_lock_bh(&bdi_lock); 891 spin_lock_bh(&bdi_lock);
diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c
index d04ac1ec0559..1826f191e72c 100644
--- a/mm/early_ioremap.c
+++ b/mm/early_ioremap.c
@@ -111,7 +111,7 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
111 enum fixed_addresses idx; 111 enum fixed_addresses idx;
112 int i, slot; 112 int i, slot;
113 113
114 WARN_ON(system_state != SYSTEM_BOOTING); 114 WARN_ON(system_state >= SYSTEM_RUNNING);
115 115
116 slot = -1; 116 slot = -1;
117 for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { 117 for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
diff --git a/mm/frame_vector.c b/mm/frame_vector.c
index 297c7238f7d4..c64dca6e27c2 100644
--- a/mm/frame_vector.c
+++ b/mm/frame_vector.c
@@ -62,8 +62,10 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
62 * get_user_pages_longterm() and disallow it for filesystem-dax 62 * get_user_pages_longterm() and disallow it for filesystem-dax
63 * mappings. 63 * mappings.
64 */ 64 */
65 if (vma_is_fsdax(vma)) 65 if (vma_is_fsdax(vma)) {
66 return -EOPNOTSUPP; 66 ret = -EOPNOTSUPP;
67 goto out;
68 }
67 69
68 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) { 70 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) {
69 vec->got_ref = true; 71 vec->got_ref = true;
diff --git a/mm/gup.c b/mm/gup.c
index d3fb60e5bfac..e0d82b6706d7 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -66,7 +66,7 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
66 */ 66 */
67static inline bool can_follow_write_pte(pte_t pte, unsigned int flags) 67static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
68{ 68{
69 return pte_access_permitted(pte, WRITE) || 69 return pte_write(pte) ||
70 ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte)); 70 ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
71} 71}
72 72
diff --git a/mm/hmm.c b/mm/hmm.c
index 3a5c172af560..ea19742a5d60 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -391,11 +391,11 @@ again:
391 if (pmd_protnone(pmd)) 391 if (pmd_protnone(pmd))
392 return hmm_vma_walk_clear(start, end, walk); 392 return hmm_vma_walk_clear(start, end, walk);
393 393
394 if (!pmd_access_permitted(pmd, write_fault)) 394 if (write_fault && !pmd_write(pmd))
395 return hmm_vma_walk_clear(start, end, walk); 395 return hmm_vma_walk_clear(start, end, walk);
396 396
397 pfn = pmd_pfn(pmd) + pte_index(addr); 397 pfn = pmd_pfn(pmd) + pte_index(addr);
398 flag |= pmd_access_permitted(pmd, WRITE) ? HMM_PFN_WRITE : 0; 398 flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0;
399 for (; addr < end; addr += PAGE_SIZE, i++, pfn++) 399 for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
400 pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag; 400 pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag;
401 return 0; 401 return 0;
@@ -456,11 +456,11 @@ again:
456 continue; 456 continue;
457 } 457 }
458 458
459 if (!pte_access_permitted(pte, write_fault)) 459 if (write_fault && !pte_write(pte))
460 goto fault; 460 goto fault;
461 461
462 pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag; 462 pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag;
463 pfns[i] |= pte_access_permitted(pte, WRITE) ? HMM_PFN_WRITE : 0; 463 pfns[i] |= pte_write(pte) ? HMM_PFN_WRITE : 0;
464 continue; 464 continue;
465 465
466fault: 466fault:
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2f2f5e774902..0e7ded98d114 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -870,7 +870,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
870 */ 870 */
871 WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set"); 871 WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");
872 872
873 if (!pmd_access_permitted(*pmd, flags & FOLL_WRITE)) 873 if (flags & FOLL_WRITE && !pmd_write(*pmd))
874 return NULL; 874 return NULL;
875 875
876 if (pmd_present(*pmd) && pmd_devmap(*pmd)) 876 if (pmd_present(*pmd) && pmd_devmap(*pmd))
@@ -1012,7 +1012,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
1012 1012
1013 assert_spin_locked(pud_lockptr(mm, pud)); 1013 assert_spin_locked(pud_lockptr(mm, pud));
1014 1014
1015 if (!pud_access_permitted(*pud, flags & FOLL_WRITE)) 1015 if (flags & FOLL_WRITE && !pud_write(*pud))
1016 return NULL; 1016 return NULL;
1017 1017
1018 if (pud_present(*pud) && pud_devmap(*pud)) 1018 if (pud_present(*pud) && pud_devmap(*pud))
@@ -1386,7 +1386,7 @@ out_unlock:
1386 */ 1386 */
1387static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags) 1387static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
1388{ 1388{
1389 return pmd_access_permitted(pmd, WRITE) || 1389 return pmd_write(pmd) ||
1390 ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd)); 1390 ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
1391} 1391}
1392 1392
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 3d4781756d50..d73c14294f3a 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1523,7 +1523,7 @@ static void kmemleak_scan(void)
1523 if (page_count(page) == 0) 1523 if (page_count(page) == 0)
1524 continue; 1524 continue;
1525 scan_block(page, page + 1, NULL); 1525 scan_block(page, page + 1, NULL);
1526 if (!(pfn % (MAX_SCAN_SIZE / sizeof(*page)))) 1526 if (!(pfn & 63))
1527 cond_resched(); 1527 cond_resched();
1528 } 1528 }
1529 } 1529 }
diff --git a/mm/memory.c b/mm/memory.c
index 5eb3d2524bdc..ca5674cbaff2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3831,7 +3831,8 @@ static inline int create_huge_pmd(struct vm_fault *vmf)
3831 return VM_FAULT_FALLBACK; 3831 return VM_FAULT_FALLBACK;
3832} 3832}
3833 3833
3834static int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd) 3834/* `inline' is required to avoid gcc 4.1.2 build error */
3835static inline int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
3835{ 3836{
3836 if (vma_is_anonymous(vmf->vma)) 3837 if (vma_is_anonymous(vmf->vma))
3837 return do_huge_pmd_wp_page(vmf, orig_pmd); 3838 return do_huge_pmd_wp_page(vmf, orig_pmd);
@@ -3948,7 +3949,7 @@ static int handle_pte_fault(struct vm_fault *vmf)
3948 if (unlikely(!pte_same(*vmf->pte, entry))) 3949 if (unlikely(!pte_same(*vmf->pte, entry)))
3949 goto unlock; 3950 goto unlock;
3950 if (vmf->flags & FAULT_FLAG_WRITE) { 3951 if (vmf->flags & FAULT_FLAG_WRITE) {
3951 if (!pte_access_permitted(entry, WRITE)) 3952 if (!pte_write(entry))
3952 return do_wp_page(vmf); 3953 return do_wp_page(vmf);
3953 entry = pte_mkdirty(entry); 3954 entry = pte_mkdirty(entry);
3954 } 3955 }
@@ -4013,7 +4014,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
4013 4014
4014 /* NUMA case for anonymous PUDs would go here */ 4015 /* NUMA case for anonymous PUDs would go here */
4015 4016
4016 if (dirty && !pud_access_permitted(orig_pud, WRITE)) { 4017 if (dirty && !pud_write(orig_pud)) {
4017 ret = wp_huge_pud(&vmf, orig_pud); 4018 ret = wp_huge_pud(&vmf, orig_pud);
4018 if (!(ret & VM_FAULT_FALLBACK)) 4019 if (!(ret & VM_FAULT_FALLBACK))
4019 return ret; 4020 return ret;
@@ -4046,7 +4047,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
4046 if (pmd_protnone(orig_pmd) && vma_is_accessible(vma)) 4047 if (pmd_protnone(orig_pmd) && vma_is_accessible(vma))
4047 return do_huge_pmd_numa_page(&vmf, orig_pmd); 4048 return do_huge_pmd_numa_page(&vmf, orig_pmd);
4048 4049
4049 if (dirty && !pmd_access_permitted(orig_pmd, WRITE)) { 4050 if (dirty && !pmd_write(orig_pmd)) {
4050 ret = wp_huge_pmd(&vmf, orig_pmd); 4051 ret = wp_huge_pmd(&vmf, orig_pmd);
4051 if (!(ret & VM_FAULT_FALLBACK)) 4052 if (!(ret & VM_FAULT_FALLBACK))
4052 return ret; 4053 return ret;
@@ -4336,7 +4337,7 @@ int follow_phys(struct vm_area_struct *vma,
4336 goto out; 4337 goto out;
4337 pte = *ptep; 4338 pte = *ptep;
4338 4339
4339 if (!pte_access_permitted(pte, flags & FOLL_WRITE)) 4340 if ((flags & FOLL_WRITE) && !pte_write(pte))
4340 goto unlock; 4341 goto unlock;
4341 4342
4342 *prot = pgprot_val(pte_pgprot(pte)); 4343 *prot = pgprot_val(pte_pgprot(pte));
diff --git a/mm/mmap.c b/mm/mmap.c
index a4d546821214..9efdc021ad22 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3019,20 +3019,20 @@ void exit_mmap(struct mm_struct *mm)
3019 /* Use -1 here to ensure all VMAs in the mm are unmapped */ 3019 /* Use -1 here to ensure all VMAs in the mm are unmapped */
3020 unmap_vmas(&tlb, vma, 0, -1); 3020 unmap_vmas(&tlb, vma, 0, -1);
3021 3021
3022 set_bit(MMF_OOM_SKIP, &mm->flags); 3022 if (unlikely(mm_is_oom_victim(mm))) {
3023 if (unlikely(tsk_is_oom_victim(current))) {
3024 /* 3023 /*
3025 * Wait for oom_reap_task() to stop working on this 3024 * Wait for oom_reap_task() to stop working on this
3026 * mm. Because MMF_OOM_SKIP is already set before 3025 * mm. Because MMF_OOM_SKIP is already set before
3027 * calling down_read(), oom_reap_task() will not run 3026 * calling down_read(), oom_reap_task() will not run
3028 * on this "mm" post up_write(). 3027 * on this "mm" post up_write().
3029 * 3028 *
3030 * tsk_is_oom_victim() cannot be set from under us 3029 * mm_is_oom_victim() cannot be set from under us
3031 * either because current->mm is already set to NULL 3030 * either because victim->mm is already set to NULL
3032 * under task_lock before calling mmput and oom_mm is 3031 * under task_lock before calling mmput and oom_mm is
3033 * set not NULL by the OOM killer only if current->mm 3032 * set not NULL by the OOM killer only if victim->mm
3034 * is found not NULL while holding the task_lock. 3033 * is found not NULL while holding the task_lock.
3035 */ 3034 */
3035 set_bit(MMF_OOM_SKIP, &mm->flags);
3036 down_write(&mm->mmap_sem); 3036 down_write(&mm->mmap_sem);
3037 up_write(&mm->mmap_sem); 3037 up_write(&mm->mmap_sem);
3038 } 3038 }
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index c957be32b27a..29f855551efe 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -683,8 +683,10 @@ static void mark_oom_victim(struct task_struct *tsk)
683 return; 683 return;
684 684
685 /* oom_mm is bound to the signal struct life time. */ 685 /* oom_mm is bound to the signal struct life time. */
686 if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) 686 if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) {
687 mmgrab(tsk->signal->oom_mm); 687 mmgrab(tsk->signal->oom_mm);
688 set_bit(MMF_OOM_VICTIM, &mm->flags);
689 }
688 690
689 /* 691 /*
690 * Make sure that the task is woken up from uninterruptible sleep 692 * Make sure that the task is woken up from uninterruptible sleep
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 73f5d4556b3d..7e5e775e97f4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2684,6 +2684,7 @@ void free_unref_page_list(struct list_head *list)
2684{ 2684{
2685 struct page *page, *next; 2685 struct page *page, *next;
2686 unsigned long flags, pfn; 2686 unsigned long flags, pfn;
2687 int batch_count = 0;
2687 2688
2688 /* Prepare pages for freeing */ 2689 /* Prepare pages for freeing */
2689 list_for_each_entry_safe(page, next, list, lru) { 2690 list_for_each_entry_safe(page, next, list, lru) {
@@ -2700,6 +2701,16 @@ void free_unref_page_list(struct list_head *list)
2700 set_page_private(page, 0); 2701 set_page_private(page, 0);
2701 trace_mm_page_free_batched(page); 2702 trace_mm_page_free_batched(page);
2702 free_unref_page_commit(page, pfn); 2703 free_unref_page_commit(page, pfn);
2704
2705 /*
2706 * Guard against excessive IRQ disabled times when we get
2707 * a large list of pages to free.
2708 */
2709 if (++batch_count == SWAP_CLUSTER_MAX) {
2710 local_irq_restore(flags);
2711 batch_count = 0;
2712 local_irq_save(flags);
2713 }
2703 } 2714 }
2704 local_irq_restore(flags); 2715 local_irq_restore(flags);
2705} 2716}
diff --git a/mm/percpu.c b/mm/percpu.c
index 79e3549cab0f..50e7fdf84055 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -2719,7 +2719,11 @@ void __init setup_per_cpu_areas(void)
2719 2719
2720 if (pcpu_setup_first_chunk(ai, fc) < 0) 2720 if (pcpu_setup_first_chunk(ai, fc) < 0)
2721 panic("Failed to initialize percpu areas."); 2721 panic("Failed to initialize percpu areas.");
2722#ifdef CONFIG_CRIS
2723#warning "the CRIS architecture has physical and virtual addresses confused"
2724#else
2722 pcpu_free_alloc_info(ai); 2725 pcpu_free_alloc_info(ai);
2726#endif
2723} 2727}
2724 2728
2725#endif /* CONFIG_SMP */ 2729#endif /* CONFIG_SMP */
diff --git a/mm/slab.c b/mm/slab.c
index 183e996dde5f..4e51ef954026 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1584,11 +1584,8 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
1584 *dbg_redzone2(cachep, objp)); 1584 *dbg_redzone2(cachep, objp));
1585 } 1585 }
1586 1586
1587 if (cachep->flags & SLAB_STORE_USER) { 1587 if (cachep->flags & SLAB_STORE_USER)
1588 pr_err("Last user: [<%p>](%pSR)\n", 1588 pr_err("Last user: (%pSR)\n", *dbg_userword(cachep, objp));
1589 *dbg_userword(cachep, objp),
1590 *dbg_userword(cachep, objp));
1591 }
1592 realobj = (char *)objp + obj_offset(cachep); 1589 realobj = (char *)objp + obj_offset(cachep);
1593 size = cachep->object_size; 1590 size = cachep->object_size;
1594 for (i = 0; i < size && lines; i += 16, lines--) { 1591 for (i = 0; i < size && lines; i += 16, lines--) {
@@ -1621,7 +1618,7 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1621 /* Mismatch ! */ 1618 /* Mismatch ! */
1622 /* Print header */ 1619 /* Print header */
1623 if (lines == 0) { 1620 if (lines == 0) {
1624 pr_err("Slab corruption (%s): %s start=%p, len=%d\n", 1621 pr_err("Slab corruption (%s): %s start=%px, len=%d\n",
1625 print_tainted(), cachep->name, 1622 print_tainted(), cachep->name,
1626 realobj, size); 1623 realobj, size);
1627 print_objinfo(cachep, objp, 0); 1624 print_objinfo(cachep, objp, 0);
@@ -1650,13 +1647,13 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1650 if (objnr) { 1647 if (objnr) {
1651 objp = index_to_obj(cachep, page, objnr - 1); 1648 objp = index_to_obj(cachep, page, objnr - 1);
1652 realobj = (char *)objp + obj_offset(cachep); 1649 realobj = (char *)objp + obj_offset(cachep);
1653 pr_err("Prev obj: start=%p, len=%d\n", realobj, size); 1650 pr_err("Prev obj: start=%px, len=%d\n", realobj, size);
1654 print_objinfo(cachep, objp, 2); 1651 print_objinfo(cachep, objp, 2);
1655 } 1652 }
1656 if (objnr + 1 < cachep->num) { 1653 if (objnr + 1 < cachep->num) {
1657 objp = index_to_obj(cachep, page, objnr + 1); 1654 objp = index_to_obj(cachep, page, objnr + 1);
1658 realobj = (char *)objp + obj_offset(cachep); 1655 realobj = (char *)objp + obj_offset(cachep);
1659 pr_err("Next obj: start=%p, len=%d\n", realobj, size); 1656 pr_err("Next obj: start=%px, len=%d\n", realobj, size);
1660 print_objinfo(cachep, objp, 2); 1657 print_objinfo(cachep, objp, 2);
1661 } 1658 }
1662 } 1659 }
@@ -2608,7 +2605,7 @@ static void slab_put_obj(struct kmem_cache *cachep,
2608 /* Verify double free bug */ 2605 /* Verify double free bug */
2609 for (i = page->active; i < cachep->num; i++) { 2606 for (i = page->active; i < cachep->num; i++) {
2610 if (get_free_obj(page, i) == objnr) { 2607 if (get_free_obj(page, i) == objnr) {
2611 pr_err("slab: double free detected in cache '%s', objp %p\n", 2608 pr_err("slab: double free detected in cache '%s', objp %px\n",
2612 cachep->name, objp); 2609 cachep->name, objp);
2613 BUG(); 2610 BUG();
2614 } 2611 }
@@ -2772,7 +2769,7 @@ static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
2772 else 2769 else
2773 slab_error(cache, "memory outside object was overwritten"); 2770 slab_error(cache, "memory outside object was overwritten");
2774 2771
2775 pr_err("%p: redzone 1:0x%llx, redzone 2:0x%llx\n", 2772 pr_err("%px: redzone 1:0x%llx, redzone 2:0x%llx\n",
2776 obj, redzone1, redzone2); 2773 obj, redzone1, redzone2);
2777} 2774}
2778 2775
@@ -3078,7 +3075,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3078 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE || 3075 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
3079 *dbg_redzone2(cachep, objp) != RED_INACTIVE) { 3076 *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
3080 slab_error(cachep, "double free, or memory outside object was overwritten"); 3077 slab_error(cachep, "double free, or memory outside object was overwritten");
3081 pr_err("%p: redzone 1:0x%llx, redzone 2:0x%llx\n", 3078 pr_err("%px: redzone 1:0x%llx, redzone 2:0x%llx\n",
3082 objp, *dbg_redzone1(cachep, objp), 3079 objp, *dbg_redzone1(cachep, objp),
3083 *dbg_redzone2(cachep, objp)); 3080 *dbg_redzone2(cachep, objp));
3084 } 3081 }
@@ -3091,7 +3088,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3091 cachep->ctor(objp); 3088 cachep->ctor(objp);
3092 if (ARCH_SLAB_MINALIGN && 3089 if (ARCH_SLAB_MINALIGN &&
3093 ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) { 3090 ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) {
3094 pr_err("0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n", 3091 pr_err("0x%px: not aligned to ARCH_SLAB_MINALIGN=%d\n",
3095 objp, (int)ARCH_SLAB_MINALIGN); 3092 objp, (int)ARCH_SLAB_MINALIGN);
3096 } 3093 }
3097 return objp; 3094 return objp;
@@ -4283,7 +4280,7 @@ static void show_symbol(struct seq_file *m, unsigned long address)
4283 return; 4280 return;
4284 } 4281 }
4285#endif 4282#endif
4286 seq_printf(m, "%p", (void *)address); 4283 seq_printf(m, "%px", (void *)address);
4287} 4284}
4288 4285
4289static int leaks_show(struct seq_file *m, void *p) 4286static int leaks_show(struct seq_file *m, void *p)
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 1b659ab652fb..bbe8414b6ee7 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1214,7 +1214,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
1214 orig_node->last_seen = jiffies; 1214 orig_node->last_seen = jiffies;
1215 1215
1216 /* find packet count of corresponding one hop neighbor */ 1216 /* find packet count of corresponding one hop neighbor */
1217 spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); 1217 spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
1218 if_num = if_incoming->if_num; 1218 if_num = if_incoming->if_num;
1219 orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num]; 1219 orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num];
1220 neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing); 1220 neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing);
@@ -1224,7 +1224,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
1224 } else { 1224 } else {
1225 neigh_rq_count = 0; 1225 neigh_rq_count = 0;
1226 } 1226 }
1227 spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); 1227 spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
1228 1228
1229 /* pay attention to not get a value bigger than 100 % */ 1229 /* pay attention to not get a value bigger than 100 % */
1230 if (orig_eq_count > neigh_rq_count) 1230 if (orig_eq_count > neigh_rq_count)
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 341ceab8338d..e0e2bfcd6b3e 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -814,7 +814,7 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv,
814 } 814 }
815 815
816 orig_gw = batadv_gw_node_get(bat_priv, orig_node); 816 orig_gw = batadv_gw_node_get(bat_priv, orig_node);
817 if (!orig_node) 817 if (!orig_gw)
818 goto out; 818 goto out;
819 819
820 if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0) 820 if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0)
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index a98cf1104a30..ebe6e38934e4 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -499,6 +499,8 @@ int batadv_frag_send_packet(struct sk_buff *skb,
499 */ 499 */
500 if (skb->priority >= 256 && skb->priority <= 263) 500 if (skb->priority >= 256 && skb->priority <= 263)
501 frag_header.priority = skb->priority - 256; 501 frag_header.priority = skb->priority - 256;
502 else
503 frag_header.priority = 0;
502 504
503 ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr); 505 ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr);
504 ether_addr_copy(frag_header.dest, orig_node->orig); 506 ether_addr_copy(frag_header.dest, orig_node->orig);
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 15cd2139381e..ebc4e2241c77 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -482,7 +482,7 @@ static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars)
482 482
483/** 483/**
484 * batadv_tp_sender_timeout - timer that fires in case of packet loss 484 * batadv_tp_sender_timeout - timer that fires in case of packet loss
485 * @arg: address of the related tp_vars 485 * @t: address to timer_list inside tp_vars
486 * 486 *
487 * If fired it means that there was packet loss. 487 * If fired it means that there was packet loss.
488 * Switch to Slow Start, set the ss_threshold to half of the current cwnd and 488 * Switch to Slow Start, set the ss_threshold to half of the current cwnd and
@@ -1106,7 +1106,7 @@ static void batadv_tp_reset_receiver_timer(struct batadv_tp_vars *tp_vars)
1106/** 1106/**
1107 * batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is 1107 * batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is
1108 * reached without received ack 1108 * reached without received ack
1109 * @arg: address of the related tp_vars 1109 * @t: address to timer_list inside tp_vars
1110 */ 1110 */
1111static void batadv_tp_receiver_shutdown(struct timer_list *t) 1111static void batadv_tp_receiver_shutdown(struct timer_list *t)
1112{ 1112{
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index d0ef0a8e8831..015f465c514b 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1262,19 +1262,20 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev,
1262 struct net_bridge *br = netdev_priv(dev); 1262 struct net_bridge *br = netdev_priv(dev);
1263 int err; 1263 int err;
1264 1264
1265 err = register_netdevice(dev);
1266 if (err)
1267 return err;
1268
1265 if (tb[IFLA_ADDRESS]) { 1269 if (tb[IFLA_ADDRESS]) {
1266 spin_lock_bh(&br->lock); 1270 spin_lock_bh(&br->lock);
1267 br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); 1271 br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
1268 spin_unlock_bh(&br->lock); 1272 spin_unlock_bh(&br->lock);
1269 } 1273 }
1270 1274
1271 err = register_netdevice(dev);
1272 if (err)
1273 return err;
1274
1275 err = br_changelink(dev, tb, data, extack); 1275 err = br_changelink(dev, tb, data, extack);
1276 if (err) 1276 if (err)
1277 unregister_netdevice(dev); 1277 br_dev_delete(dev, NULL);
1278
1278 return err; 1279 return err;
1279} 1280}
1280 1281
diff --git a/net/core/dev.c b/net/core/dev.c
index f47e96b62308..01ee854454a8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3904,7 +3904,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
3904 hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0, 3904 hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
3905 troom > 0 ? troom + 128 : 0, GFP_ATOMIC)) 3905 troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
3906 goto do_drop; 3906 goto do_drop;
3907 if (troom > 0 && __skb_linearize(skb)) 3907 if (skb_linearize(skb))
3908 goto do_drop; 3908 goto do_drop;
3909 } 3909 }
3910 3910
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b797832565d3..60a71be75aea 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -267,7 +267,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
267 spin_lock_bh(&net->nsid_lock); 267 spin_lock_bh(&net->nsid_lock);
268 peer = idr_find(&net->netns_ids, id); 268 peer = idr_find(&net->netns_ids, id);
269 if (peer) 269 if (peer)
270 get_net(peer); 270 peer = maybe_get_net(peer);
271 spin_unlock_bh(&net->nsid_lock); 271 spin_unlock_bh(&net->nsid_lock);
272 rcu_read_unlock(); 272 rcu_read_unlock();
273 273
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 1c4810919a0a..b9057478d69c 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -14,7 +14,6 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/types.h> 16#include <linux/types.h>
17#include <linux/module.h>
18#include <linux/string.h> 17#include <linux/string.h>
19#include <linux/errno.h> 18#include <linux/errno.h>
20#include <linux/skbuff.h> 19#include <linux/skbuff.h>
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6b0ff396fa9d..08f574081315 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1177,12 +1177,12 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
1177 int i, new_frags; 1177 int i, new_frags;
1178 u32 d_off; 1178 u32 d_off;
1179 1179
1180 if (!num_frags)
1181 return 0;
1182
1183 if (skb_shared(skb) || skb_unclone(skb, gfp_mask)) 1180 if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
1184 return -EINVAL; 1181 return -EINVAL;
1185 1182
1183 if (!num_frags)
1184 goto release;
1185
1186 new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT; 1186 new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT;
1187 for (i = 0; i < new_frags; i++) { 1187 for (i = 0; i < new_frags; i++) {
1188 page = alloc_page(gfp_mask); 1188 page = alloc_page(gfp_mask);
@@ -1238,6 +1238,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
1238 __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off); 1238 __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
1239 skb_shinfo(skb)->nr_frags = new_frags; 1239 skb_shinfo(skb)->nr_frags = new_frags;
1240 1240
1241release:
1241 skb_zcopy_clear(skb, false); 1242 skb_zcopy_clear(skb, false);
1242 return 0; 1243 return 0;
1243} 1244}
@@ -3654,8 +3655,6 @@ normal:
3654 3655
3655 skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & 3656 skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
3656 SKBTX_SHARED_FRAG; 3657 SKBTX_SHARED_FRAG;
3657 if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC))
3658 goto err;
3659 3658
3660 while (pos < offset + len) { 3659 while (pos < offset + len) {
3661 if (i >= nfrags) { 3660 if (i >= nfrags) {
@@ -3681,6 +3680,8 @@ normal:
3681 3680
3682 if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) 3681 if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
3683 goto err; 3682 goto err;
3683 if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
3684 goto err;
3684 3685
3685 *nskb_frag = *frag; 3686 *nskb_frag = *frag;
3686 __skb_frag_ref(nskb_frag); 3687 __skb_frag_ref(nskb_frag);
@@ -4293,7 +4294,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
4293 struct sock *sk = skb->sk; 4294 struct sock *sk = skb->sk;
4294 4295
4295 if (!skb_may_tx_timestamp(sk, false)) 4296 if (!skb_may_tx_timestamp(sk, false))
4296 return; 4297 goto err;
4297 4298
4298 /* Take a reference to prevent skb_orphan() from freeing the socket, 4299 /* Take a reference to prevent skb_orphan() from freeing the socket,
4299 * but only if the socket refcount is not zero. 4300 * but only if the socket refcount is not zero.
@@ -4302,7 +4303,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
4302 *skb_hwtstamps(skb) = *hwtstamps; 4303 *skb_hwtstamps(skb) = *hwtstamps;
4303 __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false); 4304 __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
4304 sock_put(sk); 4305 sock_put(sk);
4306 return;
4305 } 4307 }
4308
4309err:
4310 kfree_skb(skb);
4306} 4311}
4307EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); 4312EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
4308 4313
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index d6e7a642493b..a95a55f79137 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -16,7 +16,6 @@
16#include <linux/of_net.h> 16#include <linux/of_net.h>
17#include <linux/of_mdio.h> 17#include <linux/of_mdio.h>
18#include <linux/mdio.h> 18#include <linux/mdio.h>
19#include <linux/list.h>
20#include <net/rtnetlink.h> 19#include <net/rtnetlink.h>
21#include <net/pkt_cls.h> 20#include <net/pkt_cls.h>
22#include <net/tc_act/tc_mirred.h> 21#include <net/tc_act/tc_mirred.h>
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a4573bccd6da..7a93359fbc72 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1428,7 +1428,7 @@ skip:
1428 1428
1429static bool inetdev_valid_mtu(unsigned int mtu) 1429static bool inetdev_valid_mtu(unsigned int mtu)
1430{ 1430{
1431 return mtu >= 68; 1431 return mtu >= IPV4_MIN_MTU;
1432} 1432}
1433 1433
1434static void inetdev_send_gratuitous_arp(struct net_device *dev, 1434static void inetdev_send_gratuitous_arp(struct net_device *dev,
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f52d27a422c3..08259d078b1c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1298,14 +1298,19 @@ err_table_hash_alloc:
1298 1298
1299static void ip_fib_net_exit(struct net *net) 1299static void ip_fib_net_exit(struct net *net)
1300{ 1300{
1301 unsigned int i; 1301 int i;
1302 1302
1303 rtnl_lock(); 1303 rtnl_lock();
1304#ifdef CONFIG_IP_MULTIPLE_TABLES 1304#ifdef CONFIG_IP_MULTIPLE_TABLES
1305 RCU_INIT_POINTER(net->ipv4.fib_main, NULL); 1305 RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
1306 RCU_INIT_POINTER(net->ipv4.fib_default, NULL); 1306 RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
1307#endif 1307#endif
1308 for (i = 0; i < FIB_TABLE_HASHSZ; i++) { 1308 /* Destroy the tables in reverse order to guarantee that the
1309 * local table, ID 255, is destroyed before the main table, ID
1310 * 254. This is necessary as the local table may contain
1311 * references to data contained in the main table.
1312 */
1313 for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
1309 struct hlist_head *head = &net->ipv4.fib_table_hash[i]; 1314 struct hlist_head *head = &net->ipv4.fib_table_hash[i];
1310 struct hlist_node *tmp; 1315 struct hlist_node *tmp;
1311 struct fib_table *tb; 1316 struct fib_table *tb;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f04d944f8abe..c586597da20d 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -698,7 +698,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
698 698
699 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 699 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
700 int type = nla_type(nla); 700 int type = nla_type(nla);
701 u32 val; 701 u32 fi_val, val;
702 702
703 if (!type) 703 if (!type)
704 continue; 704 continue;
@@ -715,7 +715,11 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
715 val = nla_get_u32(nla); 715 val = nla_get_u32(nla);
716 } 716 }
717 717
718 if (fi->fib_metrics->metrics[type - 1] != val) 718 fi_val = fi->fib_metrics->metrics[type - 1];
719 if (type == RTAX_FEATURES)
720 fi_val &= ~DST_FEATURE_ECN_CA;
721
722 if (fi_val != val)
719 return false; 723 return false;
720 } 724 }
721 725
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d1f8f302dbf3..726f6b608274 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -89,6 +89,7 @@
89#include <linux/rtnetlink.h> 89#include <linux/rtnetlink.h>
90#include <linux/times.h> 90#include <linux/times.h>
91#include <linux/pkt_sched.h> 91#include <linux/pkt_sched.h>
92#include <linux/byteorder/generic.h>
92 93
93#include <net/net_namespace.h> 94#include <net/net_namespace.h>
94#include <net/arp.h> 95#include <net/arp.h>
@@ -321,6 +322,23 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted)
321 return scount; 322 return scount;
322} 323}
323 324
325/* source address selection per RFC 3376 section 4.2.13 */
326static __be32 igmpv3_get_srcaddr(struct net_device *dev,
327 const struct flowi4 *fl4)
328{
329 struct in_device *in_dev = __in_dev_get_rcu(dev);
330
331 if (!in_dev)
332 return htonl(INADDR_ANY);
333
334 for_ifa(in_dev) {
335 if (inet_ifa_match(fl4->saddr, ifa))
336 return fl4->saddr;
337 } endfor_ifa(in_dev);
338
339 return htonl(INADDR_ANY);
340}
341
324static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) 342static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
325{ 343{
326 struct sk_buff *skb; 344 struct sk_buff *skb;
@@ -368,7 +386,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
368 pip->frag_off = htons(IP_DF); 386 pip->frag_off = htons(IP_DF);
369 pip->ttl = 1; 387 pip->ttl = 1;
370 pip->daddr = fl4.daddr; 388 pip->daddr = fl4.daddr;
371 pip->saddr = fl4.saddr; 389 pip->saddr = igmpv3_get_srcaddr(dev, &fl4);
372 pip->protocol = IPPROTO_IGMP; 390 pip->protocol = IPPROTO_IGMP;
373 pip->tot_len = 0; /* filled in later */ 391 pip->tot_len = 0; /* filled in later */
374 ip_select_ident(net, skb, NULL); 392 ip_select_ident(net, skb, NULL);
@@ -404,16 +422,17 @@ static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
404} 422}
405 423
406static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, 424static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
407 int type, struct igmpv3_grec **ppgr) 425 int type, struct igmpv3_grec **ppgr, unsigned int mtu)
408{ 426{
409 struct net_device *dev = pmc->interface->dev; 427 struct net_device *dev = pmc->interface->dev;
410 struct igmpv3_report *pih; 428 struct igmpv3_report *pih;
411 struct igmpv3_grec *pgr; 429 struct igmpv3_grec *pgr;
412 430
413 if (!skb) 431 if (!skb) {
414 skb = igmpv3_newpack(dev, dev->mtu); 432 skb = igmpv3_newpack(dev, mtu);
415 if (!skb) 433 if (!skb)
416 return NULL; 434 return NULL;
435 }
417 pgr = skb_put(skb, sizeof(struct igmpv3_grec)); 436 pgr = skb_put(skb, sizeof(struct igmpv3_grec));
418 pgr->grec_type = type; 437 pgr->grec_type = type;
419 pgr->grec_auxwords = 0; 438 pgr->grec_auxwords = 0;
@@ -436,12 +455,17 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
436 struct igmpv3_grec *pgr = NULL; 455 struct igmpv3_grec *pgr = NULL;
437 struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list; 456 struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
438 int scount, stotal, first, isquery, truncate; 457 int scount, stotal, first, isquery, truncate;
458 unsigned int mtu;
439 459
440 if (pmc->multiaddr == IGMP_ALL_HOSTS) 460 if (pmc->multiaddr == IGMP_ALL_HOSTS)
441 return skb; 461 return skb;
442 if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) 462 if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
443 return skb; 463 return skb;
444 464
465 mtu = READ_ONCE(dev->mtu);
466 if (mtu < IPV4_MIN_MTU)
467 return skb;
468
445 isquery = type == IGMPV3_MODE_IS_INCLUDE || 469 isquery = type == IGMPV3_MODE_IS_INCLUDE ||
446 type == IGMPV3_MODE_IS_EXCLUDE; 470 type == IGMPV3_MODE_IS_EXCLUDE;
447 truncate = type == IGMPV3_MODE_IS_EXCLUDE || 471 truncate = type == IGMPV3_MODE_IS_EXCLUDE ||
@@ -462,7 +486,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
462 AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { 486 AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
463 if (skb) 487 if (skb)
464 igmpv3_sendpack(skb); 488 igmpv3_sendpack(skb);
465 skb = igmpv3_newpack(dev, dev->mtu); 489 skb = igmpv3_newpack(dev, mtu);
466 } 490 }
467 } 491 }
468 first = 1; 492 first = 1;
@@ -498,12 +522,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
498 pgr->grec_nsrcs = htons(scount); 522 pgr->grec_nsrcs = htons(scount);
499 if (skb) 523 if (skb)
500 igmpv3_sendpack(skb); 524 igmpv3_sendpack(skb);
501 skb = igmpv3_newpack(dev, dev->mtu); 525 skb = igmpv3_newpack(dev, mtu);
502 first = 1; 526 first = 1;
503 scount = 0; 527 scount = 0;
504 } 528 }
505 if (first) { 529 if (first) {
506 skb = add_grhead(skb, pmc, type, &pgr); 530 skb = add_grhead(skb, pmc, type, &pgr, mtu);
507 first = 0; 531 first = 0;
508 } 532 }
509 if (!skb) 533 if (!skb)
@@ -538,7 +562,7 @@ empty_source:
538 igmpv3_sendpack(skb); 562 igmpv3_sendpack(skb);
539 skb = NULL; /* add_grhead will get a new one */ 563 skb = NULL; /* add_grhead will get a new one */
540 } 564 }
541 skb = add_grhead(skb, pmc, type, &pgr); 565 skb = add_grhead(skb, pmc, type, &pgr, mtu);
542 } 566 }
543 } 567 }
544 if (pgr) 568 if (pgr)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index bb6239169b1a..45ffd3d045d2 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -266,7 +266,7 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
266 len = gre_hdr_len + sizeof(*ershdr); 266 len = gre_hdr_len + sizeof(*ershdr);
267 267
268 if (unlikely(!pskb_may_pull(skb, len))) 268 if (unlikely(!pskb_may_pull(skb, len)))
269 return -ENOMEM; 269 return PACKET_REJECT;
270 270
271 iph = ip_hdr(skb); 271 iph = ip_hdr(skb);
272 ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len); 272 ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
@@ -1310,6 +1310,7 @@ static const struct net_device_ops erspan_netdev_ops = {
1310static void ipgre_tap_setup(struct net_device *dev) 1310static void ipgre_tap_setup(struct net_device *dev)
1311{ 1311{
1312 ether_setup(dev); 1312 ether_setup(dev);
1313 dev->max_mtu = 0;
1313 dev->netdev_ops = &gre_tap_netdev_ops; 1314 dev->netdev_ops = &gre_tap_netdev_ops;
1314 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1315 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1315 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1316 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index fe6fee728ce4..5ddb1cb52bd4 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -349,8 +349,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
349 dev->needed_headroom = t_hlen + hlen; 349 dev->needed_headroom = t_hlen + hlen;
350 mtu -= (dev->hard_header_len + t_hlen); 350 mtu -= (dev->hard_header_len + t_hlen);
351 351
352 if (mtu < 68) 352 if (mtu < IPV4_MIN_MTU)
353 mtu = 68; 353 mtu = IPV4_MIN_MTU;
354 354
355 return mtu; 355 return mtu;
356} 356}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index f88221aebc9d..0c3c944a7b72 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -373,7 +373,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
373 if (!xt_find_jump_offset(offsets, newpos, 373 if (!xt_find_jump_offset(offsets, newpos,
374 newinfo->number)) 374 newinfo->number))
375 return 0; 375 return 0;
376 e = entry0 + newpos;
377 } else { 376 } else {
378 /* ... this is a fallthru */ 377 /* ... this is a fallthru */
379 newpos = pos + e->next_offset; 378 newpos = pos + e->next_offset;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 4cbe5e80f3bf..2e0d339028bb 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -439,7 +439,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
439 if (!xt_find_jump_offset(offsets, newpos, 439 if (!xt_find_jump_offset(offsets, newpos,
440 newinfo->number)) 440 newinfo->number))
441 return 0; 441 return 0;
442 e = entry0 + newpos;
443 } else { 442 } else {
444 /* ... this is a fallthru */ 443 /* ... this is a fallthru */
445 newpos = pos + e->next_offset; 444 newpos = pos + e->next_offset;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 17b4ca562944..69060e3abe85 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -813,12 +813,13 @@ static int clusterip_net_init(struct net *net)
813 813
814static void clusterip_net_exit(struct net *net) 814static void clusterip_net_exit(struct net *net)
815{ 815{
816#ifdef CONFIG_PROC_FS
817 struct clusterip_net *cn = net_generic(net, clusterip_net_id); 816 struct clusterip_net *cn = net_generic(net, clusterip_net_id);
817#ifdef CONFIG_PROC_FS
818 proc_remove(cn->procdir); 818 proc_remove(cn->procdir);
819 cn->procdir = NULL; 819 cn->procdir = NULL;
820#endif 820#endif
821 nf_unregister_net_hook(net, &cip_arp_ops); 821 nf_unregister_net_hook(net, &cip_arp_ops);
822 WARN_ON_ONCE(!list_empty(&cn->configs));
822} 823}
823 824
824static struct pernet_operations clusterip_net_ops = { 825static struct pernet_operations clusterip_net_ops = {
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 33b70bfd1122..125c1eab3eaa 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -513,11 +513,16 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
513 int err; 513 int err;
514 struct ip_options_data opt_copy; 514 struct ip_options_data opt_copy;
515 struct raw_frag_vec rfv; 515 struct raw_frag_vec rfv;
516 int hdrincl;
516 517
517 err = -EMSGSIZE; 518 err = -EMSGSIZE;
518 if (len > 0xFFFF) 519 if (len > 0xFFFF)
519 goto out; 520 goto out;
520 521
522 /* hdrincl should be READ_ONCE(inet->hdrincl)
523 * but READ_ONCE() doesn't work with bit fields
524 */
525 hdrincl = inet->hdrincl;
521 /* 526 /*
522 * Check the flags. 527 * Check the flags.
523 */ 528 */
@@ -593,7 +598,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
593 /* Linux does not mangle headers on raw sockets, 598 /* Linux does not mangle headers on raw sockets,
594 * so that IP options + IP_HDRINCL is non-sense. 599 * so that IP options + IP_HDRINCL is non-sense.
595 */ 600 */
596 if (inet->hdrincl) 601 if (hdrincl)
597 goto done; 602 goto done;
598 if (ipc.opt->opt.srr) { 603 if (ipc.opt->opt.srr) {
599 if (!daddr) 604 if (!daddr)
@@ -615,12 +620,12 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
615 620
616 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, 621 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
617 RT_SCOPE_UNIVERSE, 622 RT_SCOPE_UNIVERSE,
618 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, 623 hdrincl ? IPPROTO_RAW : sk->sk_protocol,
619 inet_sk_flowi_flags(sk) | 624 inet_sk_flowi_flags(sk) |
620 (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), 625 (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
621 daddr, saddr, 0, 0, sk->sk_uid); 626 daddr, saddr, 0, 0, sk->sk_uid);
622 627
623 if (!inet->hdrincl) { 628 if (!hdrincl) {
624 rfv.msg = msg; 629 rfv.msg = msg;
625 rfv.hlen = 0; 630 rfv.hlen = 0;
626 631
@@ -645,7 +650,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
645 goto do_confirm; 650 goto do_confirm;
646back_from_confirm: 651back_from_confirm:
647 652
648 if (inet->hdrincl) 653 if (hdrincl)
649 err = raw_send_hdrinc(sk, &fl4, msg, len, 654 err = raw_send_hdrinc(sk, &fl4, msg, len,
650 &rt, msg->msg_flags, &ipc.sockc); 655 &rt, msg->msg_flags, &ipc.sockc);
651 656
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9550cc42de2d..45f750e85714 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -508,9 +508,6 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
508 u32 new_sample = tp->rcv_rtt_est.rtt_us; 508 u32 new_sample = tp->rcv_rtt_est.rtt_us;
509 long m = sample; 509 long m = sample;
510 510
511 if (m == 0)
512 m = 1;
513
514 if (new_sample != 0) { 511 if (new_sample != 0) {
515 /* If we sample in larger samples in the non-timestamp 512 /* If we sample in larger samples in the non-timestamp
516 * case, we could grossly overestimate the RTT especially 513 * case, we could grossly overestimate the RTT especially
@@ -547,6 +544,8 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
547 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq)) 544 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
548 return; 545 return;
549 delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time); 546 delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
547 if (!delta_us)
548 delta_us = 1;
550 tcp_rcv_rtt_update(tp, delta_us, 1); 549 tcp_rcv_rtt_update(tp, delta_us, 1);
551 550
552new_measure: 551new_measure:
@@ -563,8 +562,11 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
563 (TCP_SKB_CB(skb)->end_seq - 562 (TCP_SKB_CB(skb)->end_seq -
564 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) { 563 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) {
565 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; 564 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
566 u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); 565 u32 delta_us;
567 566
567 if (!delta)
568 delta = 1;
569 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
568 tcp_rcv_rtt_update(tp, delta_us, 0); 570 tcp_rcv_rtt_update(tp, delta_us, 0);
569 } 571 }
570} 572}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 77ea45da0fe9..94e28350f420 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -848,7 +848,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
848 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 848 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
849 req->ts_recent, 849 req->ts_recent,
850 0, 850 0,
851 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, 851 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
852 AF_INET), 852 AF_INET),
853 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, 853 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
854 ip_hdr(skb)->tos); 854 ip_hdr(skb)->tos);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 16df6dd44b98..968fda198376 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -264,6 +264,7 @@ void tcp_delack_timer_handler(struct sock *sk)
264 icsk->icsk_ack.pingpong = 0; 264 icsk->icsk_ack.pingpong = 0;
265 icsk->icsk_ack.ato = TCP_ATO_MIN; 265 icsk->icsk_ack.ato = TCP_ATO_MIN;
266 } 266 }
267 tcp_mstamp_refresh(tcp_sk(sk));
267 tcp_send_ack(sk); 268 tcp_send_ack(sk);
268 __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS); 269 __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
269 } 270 }
@@ -632,6 +633,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
632 goto out; 633 goto out;
633 } 634 }
634 635
636 tcp_mstamp_refresh(tp);
635 if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) { 637 if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
636 if (tp->linger2 >= 0) { 638 if (tp->linger2 >= 0) {
637 const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN; 639 const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index e50b7fea57ee..bcfc00e88756 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -23,6 +23,12 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
23 return xfrm4_extract_header(skb); 23 return xfrm4_extract_header(skb);
24} 24}
25 25
26static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
27 struct sk_buff *skb)
28{
29 return dst_input(skb);
30}
31
26static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk, 32static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
27 struct sk_buff *skb) 33 struct sk_buff *skb)
28{ 34{
@@ -33,7 +39,11 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
33 iph->tos, skb->dev)) 39 iph->tos, skb->dev))
34 goto drop; 40 goto drop;
35 } 41 }
36 return dst_input(skb); 42
43 if (xfrm_trans_queue(skb, xfrm4_rcv_encap_finish2))
44 goto drop;
45
46 return 0;
37drop: 47drop:
38 kfree_skb(skb); 48 kfree_skb(skb);
39 return NET_RX_DROP; 49 return NET_RX_DROP;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c26f71234b9c..c9441ca45399 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -210,7 +210,6 @@ lookup_protocol:
210 np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; 210 np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
211 np->mc_loop = 1; 211 np->mc_loop = 1;
212 np->pmtudisc = IPV6_PMTUDISC_WANT; 212 np->pmtudisc = IPV6_PMTUDISC_WANT;
213 np->autoflowlabel = ip6_default_np_autolabel(net);
214 np->repflow = net->ipv6.sysctl.flowlabel_reflect; 213 np->repflow = net->ipv6.sysctl.flowlabel_reflect;
215 sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; 214 sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
216 215
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4cfd8e0696fe..772695960890 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1014,6 +1014,36 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
1014 eth_random_addr(dev->perm_addr); 1014 eth_random_addr(dev->perm_addr);
1015} 1015}
1016 1016
1017#define GRE6_FEATURES (NETIF_F_SG | \
1018 NETIF_F_FRAGLIST | \
1019 NETIF_F_HIGHDMA | \
1020 NETIF_F_HW_CSUM)
1021
1022static void ip6gre_tnl_init_features(struct net_device *dev)
1023{
1024 struct ip6_tnl *nt = netdev_priv(dev);
1025
1026 dev->features |= GRE6_FEATURES;
1027 dev->hw_features |= GRE6_FEATURES;
1028
1029 if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
1030 /* TCP offload with GRE SEQ is not supported, nor
1031 * can we support 2 levels of outer headers requiring
1032 * an update.
1033 */
1034 if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
1035 nt->encap.type == TUNNEL_ENCAP_NONE) {
1036 dev->features |= NETIF_F_GSO_SOFTWARE;
1037 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1038 }
1039
1040 /* Can use a lockless transmit, unless we generate
1041 * output sequences
1042 */
1043 dev->features |= NETIF_F_LLTX;
1044 }
1045}
1046
1017static int ip6gre_tunnel_init_common(struct net_device *dev) 1047static int ip6gre_tunnel_init_common(struct net_device *dev)
1018{ 1048{
1019 struct ip6_tnl *tunnel; 1049 struct ip6_tnl *tunnel;
@@ -1048,6 +1078,8 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
1048 if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1078 if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1049 dev->mtu -= 8; 1079 dev->mtu -= 8;
1050 1080
1081 ip6gre_tnl_init_features(dev);
1082
1051 return 0; 1083 return 0;
1052} 1084}
1053 1085
@@ -1298,16 +1330,12 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
1298 .ndo_get_iflink = ip6_tnl_get_iflink, 1330 .ndo_get_iflink = ip6_tnl_get_iflink,
1299}; 1331};
1300 1332
1301#define GRE6_FEATURES (NETIF_F_SG | \
1302 NETIF_F_FRAGLIST | \
1303 NETIF_F_HIGHDMA | \
1304 NETIF_F_HW_CSUM)
1305
1306static void ip6gre_tap_setup(struct net_device *dev) 1333static void ip6gre_tap_setup(struct net_device *dev)
1307{ 1334{
1308 1335
1309 ether_setup(dev); 1336 ether_setup(dev);
1310 1337
1338 dev->max_mtu = 0;
1311 dev->netdev_ops = &ip6gre_tap_netdev_ops; 1339 dev->netdev_ops = &ip6gre_tap_netdev_ops;
1312 dev->needs_free_netdev = true; 1340 dev->needs_free_netdev = true;
1313 dev->priv_destructor = ip6gre_dev_free; 1341 dev->priv_destructor = ip6gre_dev_free;
@@ -1382,26 +1410,6 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
1382 nt->net = dev_net(dev); 1410 nt->net = dev_net(dev);
1383 ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); 1411 ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
1384 1412
1385 dev->features |= GRE6_FEATURES;
1386 dev->hw_features |= GRE6_FEATURES;
1387
1388 if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
1389 /* TCP offload with GRE SEQ is not supported, nor
1390 * can we support 2 levels of outer headers requiring
1391 * an update.
1392 */
1393 if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
1394 (nt->encap.type == TUNNEL_ENCAP_NONE)) {
1395 dev->features |= NETIF_F_GSO_SOFTWARE;
1396 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1397 }
1398
1399 /* Can use a lockless transmit, unless we generate
1400 * output sequences
1401 */
1402 dev->features |= NETIF_F_LLTX;
1403 }
1404
1405 err = register_netdevice(dev); 1413 err = register_netdevice(dev);
1406 if (err) 1414 if (err)
1407 goto out; 1415 goto out;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5110a418cc4d..f7dd51c42314 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -166,6 +166,14 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
166 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 166 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
167} 167}
168 168
169static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
170{
171 if (!np->autoflowlabel_set)
172 return ip6_default_np_autolabel(net);
173 else
174 return np->autoflowlabel;
175}
176
169/* 177/*
170 * xmit an sk_buff (used by TCP, SCTP and DCCP) 178 * xmit an sk_buff (used by TCP, SCTP and DCCP)
171 * Note : socket lock is not held for SYNACK packets, but might be modified 179 * Note : socket lock is not held for SYNACK packets, but might be modified
@@ -230,7 +238,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
230 hlimit = ip6_dst_hoplimit(dst); 238 hlimit = ip6_dst_hoplimit(dst);
231 239
232 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, 240 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
233 np->autoflowlabel, fl6)); 241 ip6_autoflowlabel(net, np), fl6));
234 242
235 hdr->payload_len = htons(seg_len); 243 hdr->payload_len = htons(seg_len);
236 hdr->nexthdr = proto; 244 hdr->nexthdr = proto;
@@ -1626,7 +1634,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
1626 1634
1627 ip6_flow_hdr(hdr, v6_cork->tclass, 1635 ip6_flow_hdr(hdr, v6_cork->tclass,
1628 ip6_make_flowlabel(net, skb, fl6->flowlabel, 1636 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1629 np->autoflowlabel, fl6)); 1637 ip6_autoflowlabel(net, np), fl6));
1630 hdr->hop_limit = v6_cork->hop_limit; 1638 hdr->hop_limit = v6_cork->hop_limit;
1631 hdr->nexthdr = proto; 1639 hdr->nexthdr = proto;
1632 hdr->saddr = fl6->saddr; 1640 hdr->saddr = fl6->saddr;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index db84f523656d..931c38f6ff4a 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1123,8 +1123,13 @@ route_lookup:
1123 max_headroom += 8; 1123 max_headroom += 8;
1124 mtu -= 8; 1124 mtu -= 8;
1125 } 1125 }
1126 if (mtu < IPV6_MIN_MTU) 1126 if (skb->protocol == htons(ETH_P_IPV6)) {
1127 mtu = IPV6_MIN_MTU; 1127 if (mtu < IPV6_MIN_MTU)
1128 mtu = IPV6_MIN_MTU;
1129 } else if (mtu < 576) {
1130 mtu = 576;
1131 }
1132
1128 if (skb_dst(skb) && !t->parms.collect_md) 1133 if (skb_dst(skb) && !t->parms.collect_md)
1129 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 1134 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
1130 if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { 1135 if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index b9404feabd78..2d4680e0376f 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -886,6 +886,7 @@ pref_skip_coa:
886 break; 886 break;
887 case IPV6_AUTOFLOWLABEL: 887 case IPV6_AUTOFLOWLABEL:
888 np->autoflowlabel = valbool; 888 np->autoflowlabel = valbool;
889 np->autoflowlabel_set = 1;
889 retv = 0; 890 retv = 0;
890 break; 891 break;
891 case IPV6_RECVFRAGSIZE: 892 case IPV6_RECVFRAGSIZE:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index fc6d7d143f2c..844642682b83 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1682,16 +1682,16 @@ static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel)
1682} 1682}
1683 1683
1684static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, 1684static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1685 int type, struct mld2_grec **ppgr) 1685 int type, struct mld2_grec **ppgr, unsigned int mtu)
1686{ 1686{
1687 struct net_device *dev = pmc->idev->dev;
1688 struct mld2_report *pmr; 1687 struct mld2_report *pmr;
1689 struct mld2_grec *pgr; 1688 struct mld2_grec *pgr;
1690 1689
1691 if (!skb) 1690 if (!skb) {
1692 skb = mld_newpack(pmc->idev, dev->mtu); 1691 skb = mld_newpack(pmc->idev, mtu);
1693 if (!skb) 1692 if (!skb)
1694 return NULL; 1693 return NULL;
1694 }
1695 pgr = skb_put(skb, sizeof(struct mld2_grec)); 1695 pgr = skb_put(skb, sizeof(struct mld2_grec));
1696 pgr->grec_type = type; 1696 pgr->grec_type = type;
1697 pgr->grec_auxwords = 0; 1697 pgr->grec_auxwords = 0;
@@ -1714,10 +1714,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1714 struct mld2_grec *pgr = NULL; 1714 struct mld2_grec *pgr = NULL;
1715 struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; 1715 struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
1716 int scount, stotal, first, isquery, truncate; 1716 int scount, stotal, first, isquery, truncate;
1717 unsigned int mtu;
1717 1718
1718 if (pmc->mca_flags & MAF_NOREPORT) 1719 if (pmc->mca_flags & MAF_NOREPORT)
1719 return skb; 1720 return skb;
1720 1721
1722 mtu = READ_ONCE(dev->mtu);
1723 if (mtu < IPV6_MIN_MTU)
1724 return skb;
1725
1721 isquery = type == MLD2_MODE_IS_INCLUDE || 1726 isquery = type == MLD2_MODE_IS_INCLUDE ||
1722 type == MLD2_MODE_IS_EXCLUDE; 1727 type == MLD2_MODE_IS_EXCLUDE;
1723 truncate = type == MLD2_MODE_IS_EXCLUDE || 1728 truncate = type == MLD2_MODE_IS_EXCLUDE ||
@@ -1738,7 +1743,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1738 AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { 1743 AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
1739 if (skb) 1744 if (skb)
1740 mld_sendpack(skb); 1745 mld_sendpack(skb);
1741 skb = mld_newpack(idev, dev->mtu); 1746 skb = mld_newpack(idev, mtu);
1742 } 1747 }
1743 } 1748 }
1744 first = 1; 1749 first = 1;
@@ -1774,12 +1779,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1774 pgr->grec_nsrcs = htons(scount); 1779 pgr->grec_nsrcs = htons(scount);
1775 if (skb) 1780 if (skb)
1776 mld_sendpack(skb); 1781 mld_sendpack(skb);
1777 skb = mld_newpack(idev, dev->mtu); 1782 skb = mld_newpack(idev, mtu);
1778 first = 1; 1783 first = 1;
1779 scount = 0; 1784 scount = 0;
1780 } 1785 }
1781 if (first) { 1786 if (first) {
1782 skb = add_grhead(skb, pmc, type, &pgr); 1787 skb = add_grhead(skb, pmc, type, &pgr, mtu);
1783 first = 0; 1788 first = 0;
1784 } 1789 }
1785 if (!skb) 1790 if (!skb)
@@ -1814,7 +1819,7 @@ empty_source:
1814 mld_sendpack(skb); 1819 mld_sendpack(skb);
1815 skb = NULL; /* add_grhead will get a new one */ 1820 skb = NULL; /* add_grhead will get a new one */
1816 } 1821 }
1817 skb = add_grhead(skb, pmc, type, &pgr); 1822 skb = add_grhead(skb, pmc, type, &pgr, mtu);
1818 } 1823 }
1819 } 1824 }
1820 if (pgr) 1825 if (pgr)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index f06e25065a34..1d7ae9366335 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -458,7 +458,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
458 if (!xt_find_jump_offset(offsets, newpos, 458 if (!xt_find_jump_offset(offsets, newpos,
459 newinfo->number)) 459 newinfo->number))
460 return 0; 460 return 0;
461 e = entry0 + newpos;
462 } else { 461 } else {
463 /* ... this is a fallthru */ 462 /* ... this is a fallthru */
464 newpos = pos + e->next_offset; 463 newpos = pos + e->next_offset;
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 2b1a15846f9a..92c0047e7e33 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -33,13 +33,19 @@ static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
33 33
34 if (range->flags & NF_NAT_RANGE_MAP_IPS) 34 if (range->flags & NF_NAT_RANGE_MAP_IPS)
35 return -EINVAL; 35 return -EINVAL;
36 return 0; 36 return nf_ct_netns_get(par->net, par->family);
37}
38
39static void masquerade_tg6_destroy(const struct xt_tgdtor_param *par)
40{
41 nf_ct_netns_put(par->net, par->family);
37} 42}
38 43
39static struct xt_target masquerade_tg6_reg __read_mostly = { 44static struct xt_target masquerade_tg6_reg __read_mostly = {
40 .name = "MASQUERADE", 45 .name = "MASQUERADE",
41 .family = NFPROTO_IPV6, 46 .family = NFPROTO_IPV6,
42 .checkentry = masquerade_tg6_checkentry, 47 .checkentry = masquerade_tg6_checkentry,
48 .destroy = masquerade_tg6_destroy,
43 .target = masquerade_tg6, 49 .target = masquerade_tg6,
44 .targetsize = sizeof(struct nf_nat_range), 50 .targetsize = sizeof(struct nf_nat_range),
45 .table = "nat", 51 .table = "nat",
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7a8d1500d374..0458b761f3c5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2336,6 +2336,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
2336 } 2336 }
2337 2337
2338 rt->dst.flags |= DST_HOST; 2338 rt->dst.flags |= DST_HOST;
2339 rt->dst.input = ip6_input;
2339 rt->dst.output = ip6_output; 2340 rt->dst.output = ip6_output;
2340 rt->rt6i_gateway = fl6->daddr; 2341 rt->rt6i_gateway = fl6->daddr;
2341 rt->rt6i_dst.addr = fl6->daddr; 2342 rt->rt6i_dst.addr = fl6->daddr;
@@ -4297,19 +4298,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4297 if (!ipv6_addr_any(&fl6.saddr)) 4298 if (!ipv6_addr_any(&fl6.saddr))
4298 flags |= RT6_LOOKUP_F_HAS_SADDR; 4299 flags |= RT6_LOOKUP_F_HAS_SADDR;
4299 4300
4300 if (!fibmatch) 4301 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
4301 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
4302 else
4303 dst = ip6_route_lookup(net, &fl6, 0);
4304 4302
4305 rcu_read_unlock(); 4303 rcu_read_unlock();
4306 } else { 4304 } else {
4307 fl6.flowi6_oif = oif; 4305 fl6.flowi6_oif = oif;
4308 4306
4309 if (!fibmatch) 4307 dst = ip6_route_output(net, NULL, &fl6);
4310 dst = ip6_route_output(net, NULL, &fl6);
4311 else
4312 dst = ip6_route_lookup(net, &fl6, 0);
4313 } 4308 }
4314 4309
4315 4310
@@ -4326,6 +4321,15 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4326 goto errout; 4321 goto errout;
4327 } 4322 }
4328 4323
4324 if (fibmatch && rt->dst.from) {
4325 struct rt6_info *ort = container_of(rt->dst.from,
4326 struct rt6_info, dst);
4327
4328 dst_hold(&ort->dst);
4329 ip6_rt_put(rt);
4330 rt = ort;
4331 }
4332
4329 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 4333 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
4330 if (!skb) { 4334 if (!skb) {
4331 ip6_rt_put(rt); 4335 ip6_rt_put(rt);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1f04ec0e4a7a..7178476b3d2f 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -994,7 +994,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
994 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 994 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
995 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 995 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
996 req->ts_recent, sk->sk_bound_dev_if, 996 req->ts_recent, sk->sk_bound_dev_if,
997 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 997 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
998 0, 0); 998 0, 0);
999} 999}
1000 1000
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index fe04e23af986..841f4a07438e 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -32,6 +32,14 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
32} 32}
33EXPORT_SYMBOL(xfrm6_rcv_spi); 33EXPORT_SYMBOL(xfrm6_rcv_spi);
34 34
35static int xfrm6_transport_finish2(struct net *net, struct sock *sk,
36 struct sk_buff *skb)
37{
38 if (xfrm_trans_queue(skb, ip6_rcv_finish))
39 __kfree_skb(skb);
40 return -1;
41}
42
35int xfrm6_transport_finish(struct sk_buff *skb, int async) 43int xfrm6_transport_finish(struct sk_buff *skb, int async)
36{ 44{
37 struct xfrm_offload *xo = xfrm_offload(skb); 45 struct xfrm_offload *xo = xfrm_offload(skb);
@@ -56,7 +64,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
56 64
57 NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, 65 NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
58 dev_net(skb->dev), NULL, skb, skb->dev, NULL, 66 dev_net(skb->dev), NULL, skb, skb->dev, NULL,
59 ip6_rcv_finish); 67 xfrm6_transport_finish2);
60 return -1; 68 return -1;
61} 69}
62 70
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 167f83b853e6..1621b6ab17ba 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -291,16 +291,15 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
291 int i; 291 int i;
292 292
293 mutex_lock(&sta->ampdu_mlme.mtx); 293 mutex_lock(&sta->ampdu_mlme.mtx);
294 for (i = 0; i < IEEE80211_NUM_TIDS; i++) { 294 for (i = 0; i < IEEE80211_NUM_TIDS; i++)
295 ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, 295 ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
296 WLAN_REASON_QSTA_LEAVE_QBSS, 296 WLAN_REASON_QSTA_LEAVE_QBSS,
297 reason != AGG_STOP_DESTROY_STA && 297 reason != AGG_STOP_DESTROY_STA &&
298 reason != AGG_STOP_PEER_REQUEST); 298 reason != AGG_STOP_PEER_REQUEST);
299 }
300 mutex_unlock(&sta->ampdu_mlme.mtx);
301 299
302 for (i = 0; i < IEEE80211_NUM_TIDS; i++) 300 for (i = 0; i < IEEE80211_NUM_TIDS; i++)
303 ___ieee80211_stop_tx_ba_session(sta, i, reason); 301 ___ieee80211_stop_tx_ba_session(sta, i, reason);
302 mutex_unlock(&sta->ampdu_mlme.mtx);
304 303
305 /* stopping might queue the work again - so cancel only afterwards */ 304 /* stopping might queue the work again - so cancel only afterwards */
306 cancel_work_sync(&sta->ampdu_mlme.work); 305 cancel_work_sync(&sta->ampdu_mlme.work);
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index cf1bf2605c10..dc6347342e34 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -103,7 +103,6 @@ struct bitstr {
103#define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;} 103#define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;}
104#define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;} 104#define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;}
105#define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;} 105#define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;}
106#define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND)
107static unsigned int get_len(struct bitstr *bs); 106static unsigned int get_len(struct bitstr *bs);
108static unsigned int get_bit(struct bitstr *bs); 107static unsigned int get_bit(struct bitstr *bs);
109static unsigned int get_bits(struct bitstr *bs, unsigned int b); 108static unsigned int get_bits(struct bitstr *bs, unsigned int b);
@@ -165,6 +164,19 @@ static unsigned int get_len(struct bitstr *bs)
165 return v; 164 return v;
166} 165}
167 166
167static int nf_h323_error_boundary(struct bitstr *bs, size_t bytes, size_t bits)
168{
169 bits += bs->bit;
170 bytes += bits / BITS_PER_BYTE;
171 if (bits % BITS_PER_BYTE > 0)
172 bytes++;
173
174 if (*bs->cur + bytes > *bs->end)
175 return 1;
176
177 return 0;
178}
179
168/****************************************************************************/ 180/****************************************************************************/
169static unsigned int get_bit(struct bitstr *bs) 181static unsigned int get_bit(struct bitstr *bs)
170{ 182{
@@ -279,8 +291,8 @@ static int decode_bool(struct bitstr *bs, const struct field_t *f,
279 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 291 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
280 292
281 INC_BIT(bs); 293 INC_BIT(bs);
282 294 if (nf_h323_error_boundary(bs, 0, 0))
283 CHECK_BOUND(bs, 0); 295 return H323_ERROR_BOUND;
284 return H323_ERROR_NONE; 296 return H323_ERROR_NONE;
285} 297}
286 298
@@ -293,11 +305,14 @@ static int decode_oid(struct bitstr *bs, const struct field_t *f,
293 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 305 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
294 306
295 BYTE_ALIGN(bs); 307 BYTE_ALIGN(bs);
296 CHECK_BOUND(bs, 1); 308 if (nf_h323_error_boundary(bs, 1, 0))
309 return H323_ERROR_BOUND;
310
297 len = *bs->cur++; 311 len = *bs->cur++;
298 bs->cur += len; 312 bs->cur += len;
313 if (nf_h323_error_boundary(bs, 0, 0))
314 return H323_ERROR_BOUND;
299 315
300 CHECK_BOUND(bs, 0);
301 return H323_ERROR_NONE; 316 return H323_ERROR_NONE;
302} 317}
303 318
@@ -319,6 +334,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
319 bs->cur += 2; 334 bs->cur += 2;
320 break; 335 break;
321 case CONS: /* 64K < Range < 4G */ 336 case CONS: /* 64K < Range < 4G */
337 if (nf_h323_error_boundary(bs, 0, 2))
338 return H323_ERROR_BOUND;
322 len = get_bits(bs, 2) + 1; 339 len = get_bits(bs, 2) + 1;
323 BYTE_ALIGN(bs); 340 BYTE_ALIGN(bs);
324 if (base && (f->attr & DECODE)) { /* timeToLive */ 341 if (base && (f->attr & DECODE)) { /* timeToLive */
@@ -330,7 +347,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
330 break; 347 break;
331 case UNCO: 348 case UNCO:
332 BYTE_ALIGN(bs); 349 BYTE_ALIGN(bs);
333 CHECK_BOUND(bs, 2); 350 if (nf_h323_error_boundary(bs, 2, 0))
351 return H323_ERROR_BOUND;
334 len = get_len(bs); 352 len = get_len(bs);
335 bs->cur += len; 353 bs->cur += len;
336 break; 354 break;
@@ -341,7 +359,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
341 359
342 PRINT("\n"); 360 PRINT("\n");
343 361
344 CHECK_BOUND(bs, 0); 362 if (nf_h323_error_boundary(bs, 0, 0))
363 return H323_ERROR_BOUND;
345 return H323_ERROR_NONE; 364 return H323_ERROR_NONE;
346} 365}
347 366
@@ -357,7 +376,8 @@ static int decode_enum(struct bitstr *bs, const struct field_t *f,
357 INC_BITS(bs, f->sz); 376 INC_BITS(bs, f->sz);
358 } 377 }
359 378
360 CHECK_BOUND(bs, 0); 379 if (nf_h323_error_boundary(bs, 0, 0))
380 return H323_ERROR_BOUND;
361 return H323_ERROR_NONE; 381 return H323_ERROR_NONE;
362} 382}
363 383
@@ -375,12 +395,14 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
375 len = f->lb; 395 len = f->lb;
376 break; 396 break;
377 case WORD: /* 2-byte length */ 397 case WORD: /* 2-byte length */
378 CHECK_BOUND(bs, 2); 398 if (nf_h323_error_boundary(bs, 2, 0))
399 return H323_ERROR_BOUND;
379 len = (*bs->cur++) << 8; 400 len = (*bs->cur++) << 8;
380 len += (*bs->cur++) + f->lb; 401 len += (*bs->cur++) + f->lb;
381 break; 402 break;
382 case SEMI: 403 case SEMI:
383 CHECK_BOUND(bs, 2); 404 if (nf_h323_error_boundary(bs, 2, 0))
405 return H323_ERROR_BOUND;
384 len = get_len(bs); 406 len = get_len(bs);
385 break; 407 break;
386 default: 408 default:
@@ -391,7 +413,8 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
391 bs->cur += len >> 3; 413 bs->cur += len >> 3;
392 bs->bit = len & 7; 414 bs->bit = len & 7;
393 415
394 CHECK_BOUND(bs, 0); 416 if (nf_h323_error_boundary(bs, 0, 0))
417 return H323_ERROR_BOUND;
395 return H323_ERROR_NONE; 418 return H323_ERROR_NONE;
396} 419}
397 420
@@ -404,12 +427,15 @@ static int decode_numstr(struct bitstr *bs, const struct field_t *f,
404 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 427 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
405 428
406 /* 2 <= Range <= 255 */ 429 /* 2 <= Range <= 255 */
430 if (nf_h323_error_boundary(bs, 0, f->sz))
431 return H323_ERROR_BOUND;
407 len = get_bits(bs, f->sz) + f->lb; 432 len = get_bits(bs, f->sz) + f->lb;
408 433
409 BYTE_ALIGN(bs); 434 BYTE_ALIGN(bs);
410 INC_BITS(bs, (len << 2)); 435 INC_BITS(bs, (len << 2));
411 436
412 CHECK_BOUND(bs, 0); 437 if (nf_h323_error_boundary(bs, 0, 0))
438 return H323_ERROR_BOUND;
413 return H323_ERROR_NONE; 439 return H323_ERROR_NONE;
414} 440}
415 441
@@ -440,15 +466,19 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f,
440 break; 466 break;
441 case BYTE: /* Range == 256 */ 467 case BYTE: /* Range == 256 */
442 BYTE_ALIGN(bs); 468 BYTE_ALIGN(bs);
443 CHECK_BOUND(bs, 1); 469 if (nf_h323_error_boundary(bs, 1, 0))
470 return H323_ERROR_BOUND;
444 len = (*bs->cur++) + f->lb; 471 len = (*bs->cur++) + f->lb;
445 break; 472 break;
446 case SEMI: 473 case SEMI:
447 BYTE_ALIGN(bs); 474 BYTE_ALIGN(bs);
448 CHECK_BOUND(bs, 2); 475 if (nf_h323_error_boundary(bs, 2, 0))
476 return H323_ERROR_BOUND;
449 len = get_len(bs) + f->lb; 477 len = get_len(bs) + f->lb;
450 break; 478 break;
451 default: /* 2 <= Range <= 255 */ 479 default: /* 2 <= Range <= 255 */
480 if (nf_h323_error_boundary(bs, 0, f->sz))
481 return H323_ERROR_BOUND;
452 len = get_bits(bs, f->sz) + f->lb; 482 len = get_bits(bs, f->sz) + f->lb;
453 BYTE_ALIGN(bs); 483 BYTE_ALIGN(bs);
454 break; 484 break;
@@ -458,7 +488,8 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f,
458 488
459 PRINT("\n"); 489 PRINT("\n");
460 490
461 CHECK_BOUND(bs, 0); 491 if (nf_h323_error_boundary(bs, 0, 0))
492 return H323_ERROR_BOUND;
462 return H323_ERROR_NONE; 493 return H323_ERROR_NONE;
463} 494}
464 495
@@ -473,10 +504,13 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
473 switch (f->sz) { 504 switch (f->sz) {
474 case BYTE: /* Range == 256 */ 505 case BYTE: /* Range == 256 */
475 BYTE_ALIGN(bs); 506 BYTE_ALIGN(bs);
476 CHECK_BOUND(bs, 1); 507 if (nf_h323_error_boundary(bs, 1, 0))
508 return H323_ERROR_BOUND;
477 len = (*bs->cur++) + f->lb; 509 len = (*bs->cur++) + f->lb;
478 break; 510 break;
479 default: /* 2 <= Range <= 255 */ 511 default: /* 2 <= Range <= 255 */
512 if (nf_h323_error_boundary(bs, 0, f->sz))
513 return H323_ERROR_BOUND;
480 len = get_bits(bs, f->sz) + f->lb; 514 len = get_bits(bs, f->sz) + f->lb;
481 BYTE_ALIGN(bs); 515 BYTE_ALIGN(bs);
482 break; 516 break;
@@ -484,7 +518,8 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
484 518
485 bs->cur += len << 1; 519 bs->cur += len << 1;
486 520
487 CHECK_BOUND(bs, 0); 521 if (nf_h323_error_boundary(bs, 0, 0))
522 return H323_ERROR_BOUND;
488 return H323_ERROR_NONE; 523 return H323_ERROR_NONE;
489} 524}
490 525
@@ -503,9 +538,13 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
503 base = (base && (f->attr & DECODE)) ? base + f->offset : NULL; 538 base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
504 539
505 /* Extensible? */ 540 /* Extensible? */
541 if (nf_h323_error_boundary(bs, 0, 1))
542 return H323_ERROR_BOUND;
506 ext = (f->attr & EXT) ? get_bit(bs) : 0; 543 ext = (f->attr & EXT) ? get_bit(bs) : 0;
507 544
508 /* Get fields bitmap */ 545 /* Get fields bitmap */
546 if (nf_h323_error_boundary(bs, 0, f->sz))
547 return H323_ERROR_BOUND;
509 bmp = get_bitmap(bs, f->sz); 548 bmp = get_bitmap(bs, f->sz);
510 if (base) 549 if (base)
511 *(unsigned int *)base = bmp; 550 *(unsigned int *)base = bmp;
@@ -525,9 +564,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
525 564
526 /* Decode */ 565 /* Decode */
527 if (son->attr & OPEN) { /* Open field */ 566 if (son->attr & OPEN) { /* Open field */
528 CHECK_BOUND(bs, 2); 567 if (nf_h323_error_boundary(bs, 2, 0))
568 return H323_ERROR_BOUND;
529 len = get_len(bs); 569 len = get_len(bs);
530 CHECK_BOUND(bs, len); 570 if (nf_h323_error_boundary(bs, len, 0))
571 return H323_ERROR_BOUND;
531 if (!base || !(son->attr & DECODE)) { 572 if (!base || !(son->attr & DECODE)) {
532 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, 573 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE,
533 " ", son->name); 574 " ", son->name);
@@ -555,8 +596,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
555 return H323_ERROR_NONE; 596 return H323_ERROR_NONE;
556 597
557 /* Get the extension bitmap */ 598 /* Get the extension bitmap */
599 if (nf_h323_error_boundary(bs, 0, 7))
600 return H323_ERROR_BOUND;
558 bmp2_len = get_bits(bs, 7) + 1; 601 bmp2_len = get_bits(bs, 7) + 1;
559 CHECK_BOUND(bs, (bmp2_len + 7) >> 3); 602 if (nf_h323_error_boundary(bs, 0, bmp2_len))
603 return H323_ERROR_BOUND;
560 bmp2 = get_bitmap(bs, bmp2_len); 604 bmp2 = get_bitmap(bs, bmp2_len);
561 bmp |= bmp2 >> f->sz; 605 bmp |= bmp2 >> f->sz;
562 if (base) 606 if (base)
@@ -567,9 +611,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
567 for (opt = 0; opt < bmp2_len; opt++, i++, son++) { 611 for (opt = 0; opt < bmp2_len; opt++, i++, son++) {
568 /* Check Range */ 612 /* Check Range */
569 if (i >= f->ub) { /* Newer Version? */ 613 if (i >= f->ub) { /* Newer Version? */
570 CHECK_BOUND(bs, 2); 614 if (nf_h323_error_boundary(bs, 2, 0))
615 return H323_ERROR_BOUND;
571 len = get_len(bs); 616 len = get_len(bs);
572 CHECK_BOUND(bs, len); 617 if (nf_h323_error_boundary(bs, len, 0))
618 return H323_ERROR_BOUND;
573 bs->cur += len; 619 bs->cur += len;
574 continue; 620 continue;
575 } 621 }
@@ -583,9 +629,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
583 if (!((0x80000000 >> opt) & bmp2)) /* Not present */ 629 if (!((0x80000000 >> opt) & bmp2)) /* Not present */
584 continue; 630 continue;
585 631
586 CHECK_BOUND(bs, 2); 632 if (nf_h323_error_boundary(bs, 2, 0))
633 return H323_ERROR_BOUND;
587 len = get_len(bs); 634 len = get_len(bs);
588 CHECK_BOUND(bs, len); 635 if (nf_h323_error_boundary(bs, len, 0))
636 return H323_ERROR_BOUND;
589 if (!base || !(son->attr & DECODE)) { 637 if (!base || !(son->attr & DECODE)) {
590 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", 638 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
591 son->name); 639 son->name);
@@ -623,22 +671,27 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f,
623 switch (f->sz) { 671 switch (f->sz) {
624 case BYTE: 672 case BYTE:
625 BYTE_ALIGN(bs); 673 BYTE_ALIGN(bs);
626 CHECK_BOUND(bs, 1); 674 if (nf_h323_error_boundary(bs, 1, 0))
675 return H323_ERROR_BOUND;
627 count = *bs->cur++; 676 count = *bs->cur++;
628 break; 677 break;
629 case WORD: 678 case WORD:
630 BYTE_ALIGN(bs); 679 BYTE_ALIGN(bs);
631 CHECK_BOUND(bs, 2); 680 if (nf_h323_error_boundary(bs, 2, 0))
681 return H323_ERROR_BOUND;
632 count = *bs->cur++; 682 count = *bs->cur++;
633 count <<= 8; 683 count <<= 8;
634 count += *bs->cur++; 684 count += *bs->cur++;
635 break; 685 break;
636 case SEMI: 686 case SEMI:
637 BYTE_ALIGN(bs); 687 BYTE_ALIGN(bs);
638 CHECK_BOUND(bs, 2); 688 if (nf_h323_error_boundary(bs, 2, 0))
689 return H323_ERROR_BOUND;
639 count = get_len(bs); 690 count = get_len(bs);
640 break; 691 break;
641 default: 692 default:
693 if (nf_h323_error_boundary(bs, 0, f->sz))
694 return H323_ERROR_BOUND;
642 count = get_bits(bs, f->sz); 695 count = get_bits(bs, f->sz);
643 break; 696 break;
644 } 697 }
@@ -658,8 +711,11 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f,
658 for (i = 0; i < count; i++) { 711 for (i = 0; i < count; i++) {
659 if (son->attr & OPEN) { 712 if (son->attr & OPEN) {
660 BYTE_ALIGN(bs); 713 BYTE_ALIGN(bs);
714 if (nf_h323_error_boundary(bs, 2, 0))
715 return H323_ERROR_BOUND;
661 len = get_len(bs); 716 len = get_len(bs);
662 CHECK_BOUND(bs, len); 717 if (nf_h323_error_boundary(bs, len, 0))
718 return H323_ERROR_BOUND;
663 if (!base || !(son->attr & DECODE)) { 719 if (!base || !(son->attr & DECODE)) {
664 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, 720 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE,
665 " ", son->name); 721 " ", son->name);
@@ -710,11 +766,17 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
710 base = (base && (f->attr & DECODE)) ? base + f->offset : NULL; 766 base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
711 767
712 /* Decode the choice index number */ 768 /* Decode the choice index number */
769 if (nf_h323_error_boundary(bs, 0, 1))
770 return H323_ERROR_BOUND;
713 if ((f->attr & EXT) && get_bit(bs)) { 771 if ((f->attr & EXT) && get_bit(bs)) {
714 ext = 1; 772 ext = 1;
773 if (nf_h323_error_boundary(bs, 0, 7))
774 return H323_ERROR_BOUND;
715 type = get_bits(bs, 7) + f->lb; 775 type = get_bits(bs, 7) + f->lb;
716 } else { 776 } else {
717 ext = 0; 777 ext = 0;
778 if (nf_h323_error_boundary(bs, 0, f->sz))
779 return H323_ERROR_BOUND;
718 type = get_bits(bs, f->sz); 780 type = get_bits(bs, f->sz);
719 if (type >= f->lb) 781 if (type >= f->lb)
720 return H323_ERROR_RANGE; 782 return H323_ERROR_RANGE;
@@ -727,8 +789,11 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
727 /* Check Range */ 789 /* Check Range */
728 if (type >= f->ub) { /* Newer version? */ 790 if (type >= f->ub) { /* Newer version? */
729 BYTE_ALIGN(bs); 791 BYTE_ALIGN(bs);
792 if (nf_h323_error_boundary(bs, 2, 0))
793 return H323_ERROR_BOUND;
730 len = get_len(bs); 794 len = get_len(bs);
731 CHECK_BOUND(bs, len); 795 if (nf_h323_error_boundary(bs, len, 0))
796 return H323_ERROR_BOUND;
732 bs->cur += len; 797 bs->cur += len;
733 return H323_ERROR_NONE; 798 return H323_ERROR_NONE;
734 } 799 }
@@ -742,8 +807,11 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
742 807
743 if (ext || (son->attr & OPEN)) { 808 if (ext || (son->attr & OPEN)) {
744 BYTE_ALIGN(bs); 809 BYTE_ALIGN(bs);
810 if (nf_h323_error_boundary(bs, len, 0))
811 return H323_ERROR_BOUND;
745 len = get_len(bs); 812 len = get_len(bs);
746 CHECK_BOUND(bs, len); 813 if (nf_h323_error_boundary(bs, len, 0))
814 return H323_ERROR_BOUND;
747 if (!base || !(son->attr & DECODE)) { 815 if (!base || !(son->attr & DECODE)) {
748 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", 816 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
749 son->name); 817 son->name);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 59c08997bfdf..382d49792f42 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -45,7 +45,6 @@
45#include <net/netfilter/nf_conntrack_zones.h> 45#include <net/netfilter/nf_conntrack_zones.h>
46#include <net/netfilter/nf_conntrack_timestamp.h> 46#include <net/netfilter/nf_conntrack_timestamp.h>
47#include <net/netfilter/nf_conntrack_labels.h> 47#include <net/netfilter/nf_conntrack_labels.h>
48#include <net/netfilter/nf_conntrack_seqadj.h>
49#include <net/netfilter/nf_conntrack_synproxy.h> 48#include <net/netfilter/nf_conntrack_synproxy.h>
50#ifdef CONFIG_NF_NAT_NEEDED 49#ifdef CONFIG_NF_NAT_NEEDED
51#include <net/netfilter/nf_nat_core.h> 50#include <net/netfilter/nf_nat_core.h>
@@ -1566,9 +1565,11 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
1566static int ctnetlink_change_timeout(struct nf_conn *ct, 1565static int ctnetlink_change_timeout(struct nf_conn *ct,
1567 const struct nlattr * const cda[]) 1566 const struct nlattr * const cda[])
1568{ 1567{
1569 u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT])); 1568 u64 timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
1570 1569
1571 ct->timeout = nfct_time_stamp + timeout * HZ; 1570 if (timeout > INT_MAX)
1571 timeout = INT_MAX;
1572 ct->timeout = nfct_time_stamp + (u32)timeout;
1572 1573
1573 if (test_bit(IPS_DYING_BIT, &ct->status)) 1574 if (test_bit(IPS_DYING_BIT, &ct->status))
1574 return -ETIME; 1575 return -ETIME;
@@ -1768,6 +1769,7 @@ ctnetlink_create_conntrack(struct net *net,
1768 int err = -EINVAL; 1769 int err = -EINVAL;
1769 struct nf_conntrack_helper *helper; 1770 struct nf_conntrack_helper *helper;
1770 struct nf_conn_tstamp *tstamp; 1771 struct nf_conn_tstamp *tstamp;
1772 u64 timeout;
1771 1773
1772 ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC); 1774 ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC);
1773 if (IS_ERR(ct)) 1775 if (IS_ERR(ct))
@@ -1776,7 +1778,10 @@ ctnetlink_create_conntrack(struct net *net,
1776 if (!cda[CTA_TIMEOUT]) 1778 if (!cda[CTA_TIMEOUT])
1777 goto err1; 1779 goto err1;
1778 1780
1779 ct->timeout = nfct_time_stamp + ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; 1781 timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
1782 if (timeout > INT_MAX)
1783 timeout = INT_MAX;
1784 ct->timeout = (u32)timeout + nfct_time_stamp;
1780 1785
1781 rcu_read_lock(); 1786 rcu_read_lock();
1782 if (cda[CTA_HELP]) { 1787 if (cda[CTA_HELP]) {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index b12fc07111d0..37ef35b861f2 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1039,6 +1039,9 @@ static int tcp_packet(struct nf_conn *ct,
1039 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && 1039 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1040 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK]) 1040 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1041 timeout = timeouts[TCP_CONNTRACK_UNACK]; 1041 timeout = timeouts[TCP_CONNTRACK_UNACK];
1042 else if (ct->proto.tcp.last_win == 0 &&
1043 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1044 timeout = timeouts[TCP_CONNTRACK_RETRANS];
1042 else 1045 else
1043 timeout = timeouts[new_state]; 1046 timeout = timeouts[new_state];
1044 spin_unlock_bh(&ct->lock); 1047 spin_unlock_bh(&ct->lock);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d8327b43e4dc..10798b357481 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5847,6 +5847,12 @@ static int __net_init nf_tables_init_net(struct net *net)
5847 return 0; 5847 return 0;
5848} 5848}
5849 5849
5850static void __net_exit nf_tables_exit_net(struct net *net)
5851{
5852 WARN_ON_ONCE(!list_empty(&net->nft.af_info));
5853 WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
5854}
5855
5850int __nft_release_basechain(struct nft_ctx *ctx) 5856int __nft_release_basechain(struct nft_ctx *ctx)
5851{ 5857{
5852 struct nft_rule *rule, *nr; 5858 struct nft_rule *rule, *nr;
@@ -5917,6 +5923,7 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
5917 5923
5918static struct pernet_operations nf_tables_net_ops = { 5924static struct pernet_operations nf_tables_net_ops = {
5919 .init = nf_tables_init_net, 5925 .init = nf_tables_init_net,
5926 .exit = nf_tables_exit_net,
5920}; 5927};
5921 5928
5922static int __init nf_tables_module_init(void) 5929static int __init nf_tables_module_init(void)
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 41628b393673..d33ce6d5ebce 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -17,6 +17,7 @@
17#include <linux/types.h> 17#include <linux/types.h>
18#include <linux/list.h> 18#include <linux/list.h>
19#include <linux/errno.h> 19#include <linux/errno.h>
20#include <linux/capability.h>
20#include <net/netlink.h> 21#include <net/netlink.h>
21#include <net/sock.h> 22#include <net/sock.h>
22 23
@@ -407,6 +408,9 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
407 struct nfnl_cthelper *nlcth; 408 struct nfnl_cthelper *nlcth;
408 int ret = 0; 409 int ret = 0;
409 410
411 if (!capable(CAP_NET_ADMIN))
412 return -EPERM;
413
410 if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) 414 if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE])
411 return -EINVAL; 415 return -EINVAL;
412 416
@@ -611,6 +615,9 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
611 struct nfnl_cthelper *nlcth; 615 struct nfnl_cthelper *nlcth;
612 bool tuple_set = false; 616 bool tuple_set = false;
613 617
618 if (!capable(CAP_NET_ADMIN))
619 return -EPERM;
620
614 if (nlh->nlmsg_flags & NLM_F_DUMP) { 621 if (nlh->nlmsg_flags & NLM_F_DUMP) {
615 struct netlink_dump_control c = { 622 struct netlink_dump_control c = {
616 .dump = nfnl_cthelper_dump_table, 623 .dump = nfnl_cthelper_dump_table,
@@ -678,6 +685,9 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
678 struct nfnl_cthelper *nlcth, *n; 685 struct nfnl_cthelper *nlcth, *n;
679 int j = 0, ret; 686 int j = 0, ret;
680 687
688 if (!capable(CAP_NET_ADMIN))
689 return -EPERM;
690
681 if (tb[NFCTH_NAME]) 691 if (tb[NFCTH_NAME])
682 helper_name = nla_data(tb[NFCTH_NAME]); 692 helper_name = nla_data(tb[NFCTH_NAME]);
683 693
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index e5afab86381c..e955bec0acc6 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -1093,10 +1093,15 @@ static int __net_init nfnl_log_net_init(struct net *net)
1093 1093
1094static void __net_exit nfnl_log_net_exit(struct net *net) 1094static void __net_exit nfnl_log_net_exit(struct net *net)
1095{ 1095{
1096 struct nfnl_log_net *log = nfnl_log_pernet(net);
1097 unsigned int i;
1098
1096#ifdef CONFIG_PROC_FS 1099#ifdef CONFIG_PROC_FS
1097 remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter); 1100 remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter);
1098#endif 1101#endif
1099 nf_log_unset(net, &nfulnl_logger); 1102 nf_log_unset(net, &nfulnl_logger);
1103 for (i = 0; i < INSTANCE_BUCKETS; i++)
1104 WARN_ON_ONCE(!hlist_empty(&log->instance_table[i]));
1100} 1105}
1101 1106
1102static struct pernet_operations nfnl_log_net_ops = { 1107static struct pernet_operations nfnl_log_net_ops = {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index a16356cacec3..c09b36755ed7 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1512,10 +1512,15 @@ static int __net_init nfnl_queue_net_init(struct net *net)
1512 1512
1513static void __net_exit nfnl_queue_net_exit(struct net *net) 1513static void __net_exit nfnl_queue_net_exit(struct net *net)
1514{ 1514{
1515 struct nfnl_queue_net *q = nfnl_queue_pernet(net);
1516 unsigned int i;
1517
1515 nf_unregister_queue_handler(net); 1518 nf_unregister_queue_handler(net);
1516#ifdef CONFIG_PROC_FS 1519#ifdef CONFIG_PROC_FS
1517 remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter); 1520 remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
1518#endif 1521#endif
1522 for (i = 0; i < INSTANCE_BUCKETS; i++)
1523 WARN_ON_ONCE(!hlist_empty(&q->instance_table[i]));
1519} 1524}
1520 1525
1521static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list) 1526static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list)
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index a0a93d987a3b..47ec1046ad11 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -214,6 +214,8 @@ static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
214 [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 }, 214 [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 },
215 [NFTA_EXTHDR_LEN] = { .type = NLA_U32 }, 215 [NFTA_EXTHDR_LEN] = { .type = NLA_U32 },
216 [NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 }, 216 [NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 },
217 [NFTA_EXTHDR_OP] = { .type = NLA_U32 },
218 [NFTA_EXTHDR_SREG] = { .type = NLA_U32 },
217}; 219};
218 220
219static int nft_exthdr_init(const struct nft_ctx *ctx, 221static int nft_exthdr_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index a77dd514297c..55802e97f906 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1729,8 +1729,17 @@ static int __net_init xt_net_init(struct net *net)
1729 return 0; 1729 return 0;
1730} 1730}
1731 1731
1732static void __net_exit xt_net_exit(struct net *net)
1733{
1734 int i;
1735
1736 for (i = 0; i < NFPROTO_NUMPROTO; i++)
1737 WARN_ON_ONCE(!list_empty(&net->xt.tables[i]));
1738}
1739
1732static struct pernet_operations xt_net_ops = { 1740static struct pernet_operations xt_net_ops = {
1733 .init = xt_net_init, 1741 .init = xt_net_init,
1742 .exit = xt_net_exit,
1734}; 1743};
1735 1744
1736static int __init xt_init(void) 1745static int __init xt_init(void)
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 041da0d9c06f..1f7fbd3c7e5a 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -27,6 +27,9 @@ static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len,
27{ 27{
28 struct sock_fprog_kern program; 28 struct sock_fprog_kern program;
29 29
30 if (len > XT_BPF_MAX_NUM_INSTR)
31 return -EINVAL;
32
30 program.len = len; 33 program.len = len;
31 program.filter = insns; 34 program.filter = insns;
32 35
@@ -55,6 +58,9 @@ static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
55 mm_segment_t oldfs = get_fs(); 58 mm_segment_t oldfs = get_fs();
56 int retval, fd; 59 int retval, fd;
57 60
61 if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX)
62 return -EINVAL;
63
58 set_fs(KERNEL_DS); 64 set_fs(KERNEL_DS);
59 fd = bpf_obj_get_user(path, 0); 65 fd = bpf_obj_get_user(path, 0);
60 set_fs(oldfs); 66 set_fs(oldfs);
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 36e14b1f061d..a34f314a8c23 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -19,6 +19,7 @@
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/kernel.h> 20#include <linux/kernel.h>
21 21
22#include <linux/capability.h>
22#include <linux/if.h> 23#include <linux/if.h>
23#include <linux/inetdevice.h> 24#include <linux/inetdevice.h>
24#include <linux/ip.h> 25#include <linux/ip.h>
@@ -70,6 +71,9 @@ static int xt_osf_add_callback(struct net *net, struct sock *ctnl,
70 struct xt_osf_finger *kf = NULL, *sf; 71 struct xt_osf_finger *kf = NULL, *sf;
71 int err = 0; 72 int err = 0;
72 73
74 if (!capable(CAP_NET_ADMIN))
75 return -EPERM;
76
73 if (!osf_attrs[OSF_ATTR_FINGER]) 77 if (!osf_attrs[OSF_ATTR_FINGER])
74 return -EINVAL; 78 return -EINVAL;
75 79
@@ -115,6 +119,9 @@ static int xt_osf_remove_callback(struct net *net, struct sock *ctnl,
115 struct xt_osf_finger *sf; 119 struct xt_osf_finger *sf;
116 int err = -ENOENT; 120 int err = -ENOENT;
117 121
122 if (!capable(CAP_NET_ADMIN))
123 return -EPERM;
124
118 if (!osf_attrs[OSF_ATTR_FINGER]) 125 if (!osf_attrs[OSF_ATTR_FINGER])
119 return -EINVAL; 126 return -EINVAL;
120 127
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index b9e0ee4e22f5..79cc1bf36e4a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -253,6 +253,9 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
253 struct sock *sk = skb->sk; 253 struct sock *sk = skb->sk;
254 int ret = -ENOMEM; 254 int ret = -ENOMEM;
255 255
256 if (!net_eq(dev_net(dev), sock_net(sk)))
257 return 0;
258
256 dev_hold(dev); 259 dev_hold(dev);
257 260
258 if (is_vmalloc_addr(skb->head)) 261 if (is_vmalloc_addr(skb->head))
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index dbe2379329c5..f039064ce922 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -579,6 +579,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
579 return -EINVAL; 579 return -EINVAL;
580 580
581 skb_reset_network_header(skb); 581 skb_reset_network_header(skb);
582 key->eth.type = skb->protocol;
582 } else { 583 } else {
583 eth = eth_hdr(skb); 584 eth = eth_hdr(skb);
584 ether_addr_copy(key->eth.src, eth->h_source); 585 ether_addr_copy(key->eth.src, eth->h_source);
@@ -592,15 +593,23 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
592 if (unlikely(parse_vlan(skb, key))) 593 if (unlikely(parse_vlan(skb, key)))
593 return -ENOMEM; 594 return -ENOMEM;
594 595
595 skb->protocol = parse_ethertype(skb); 596 key->eth.type = parse_ethertype(skb);
596 if (unlikely(skb->protocol == htons(0))) 597 if (unlikely(key->eth.type == htons(0)))
597 return -ENOMEM; 598 return -ENOMEM;
598 599
600 /* Multiple tagged packets need to retain TPID to satisfy
601 * skb_vlan_pop(), which will later shift the ethertype into
602 * skb->protocol.
603 */
604 if (key->eth.cvlan.tci & htons(VLAN_TAG_PRESENT))
605 skb->protocol = key->eth.cvlan.tpid;
606 else
607 skb->protocol = key->eth.type;
608
599 skb_reset_network_header(skb); 609 skb_reset_network_header(skb);
600 __skb_push(skb, skb->data - skb_mac_header(skb)); 610 __skb_push(skb, skb->data - skb_mac_header(skb));
601 } 611 }
602 skb_reset_mac_len(skb); 612 skb_reset_mac_len(skb);
603 key->eth.type = skb->protocol;
604 613
605 /* Network layer. */ 614 /* Network layer. */
606 if (key->eth.type == htons(ETH_P_IP)) { 615 if (key->eth.type == htons(ETH_P_IP)) {
diff --git a/net/rds/send.c b/net/rds/send.c
index b52cdc8ae428..f72466c63f0c 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1009,6 +1009,9 @@ static int rds_rdma_bytes(struct msghdr *msg, size_t *rdma_bytes)
1009 continue; 1009 continue;
1010 1010
1011 if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) { 1011 if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) {
1012 if (cmsg->cmsg_len <
1013 CMSG_LEN(sizeof(struct rds_rdma_args)))
1014 return -EINVAL;
1012 args = CMSG_DATA(cmsg); 1015 args = CMSG_DATA(cmsg);
1013 *rdma_bytes += args->remote_vec.bytes; 1016 *rdma_bytes += args->remote_vec.bytes;
1014 } 1017 }
diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c
index 1e3f10e5da99..6445184b2759 100644
--- a/net/sched/act_meta_mark.c
+++ b/net/sched/act_meta_mark.c
@@ -22,7 +22,6 @@
22#include <net/pkt_sched.h> 22#include <net/pkt_sched.h>
23#include <uapi/linux/tc_act/tc_ife.h> 23#include <uapi/linux/tc_act/tc_ife.h>
24#include <net/tc_act/tc_ife.h> 24#include <net/tc_act/tc_ife.h>
25#include <linux/rtnetlink.h>
26 25
27static int skbmark_encode(struct sk_buff *skb, void *skbdata, 26static int skbmark_encode(struct sk_buff *skb, void *skbdata,
28 struct tcf_meta_info *e) 27 struct tcf_meta_info *e)
diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c
index 2ea1f26c9e96..7221437ca3a6 100644
--- a/net/sched/act_meta_skbtcindex.c
+++ b/net/sched/act_meta_skbtcindex.c
@@ -22,7 +22,6 @@
22#include <net/pkt_sched.h> 22#include <net/pkt_sched.h>
23#include <uapi/linux/tc_act/tc_ife.h> 23#include <uapi/linux/tc_act/tc_ife.h>
24#include <net/tc_act/tc_ife.h> 24#include <net/tc_act/tc_ife.h>
25#include <linux/rtnetlink.h>
26 25
27static int skbtcindex_encode(struct sk_buff *skb, void *skbdata, 26static int skbtcindex_encode(struct sk_buff *skb, void *skbdata,
28 struct tcf_meta_info *e) 27 struct tcf_meta_info *e)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index ddcf04b4ab43..b9d63d2246e6 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -23,7 +23,6 @@
23#include <linux/skbuff.h> 23#include <linux/skbuff.h>
24#include <linux/init.h> 24#include <linux/init.h>
25#include <linux/kmod.h> 25#include <linux/kmod.h>
26#include <linux/err.h>
27#include <linux/slab.h> 26#include <linux/slab.h>
28#include <net/net_namespace.h> 27#include <net/net_namespace.h>
29#include <net/sock.h> 28#include <net/sock.h>
@@ -352,6 +351,8 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
352{ 351{
353 struct tcf_chain *chain; 352 struct tcf_chain *chain;
354 353
354 if (!block)
355 return;
355 /* Hold a refcnt for all chains, except 0, so that they don't disappear 356 /* Hold a refcnt for all chains, except 0, so that they don't disappear
356 * while we are iterating. 357 * while we are iterating.
357 */ 358 */
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 6fe798c2df1a..8d78e7f4ecc3 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -42,7 +42,6 @@ struct cls_bpf_prog {
42 struct list_head link; 42 struct list_head link;
43 struct tcf_result res; 43 struct tcf_result res;
44 bool exts_integrated; 44 bool exts_integrated;
45 bool offloaded;
46 u32 gen_flags; 45 u32 gen_flags;
47 struct tcf_exts exts; 46 struct tcf_exts exts;
48 u32 handle; 47 u32 handle;
@@ -148,33 +147,37 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
148} 147}
149 148
150static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, 149static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
151 enum tc_clsbpf_command cmd) 150 struct cls_bpf_prog *oldprog)
152{ 151{
153 bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE;
154 struct tcf_block *block = tp->chain->block; 152 struct tcf_block *block = tp->chain->block;
155 bool skip_sw = tc_skip_sw(prog->gen_flags);
156 struct tc_cls_bpf_offload cls_bpf = {}; 153 struct tc_cls_bpf_offload cls_bpf = {};
154 struct cls_bpf_prog *obj;
155 bool skip_sw;
157 int err; 156 int err;
158 157
158 skip_sw = prog && tc_skip_sw(prog->gen_flags);
159 obj = prog ?: oldprog;
160
159 tc_cls_common_offload_init(&cls_bpf.common, tp); 161 tc_cls_common_offload_init(&cls_bpf.common, tp);
160 cls_bpf.command = cmd; 162 cls_bpf.command = TC_CLSBPF_OFFLOAD;
161 cls_bpf.exts = &prog->exts; 163 cls_bpf.exts = &obj->exts;
162 cls_bpf.prog = prog->filter; 164 cls_bpf.prog = prog ? prog->filter : NULL;
163 cls_bpf.name = prog->bpf_name; 165 cls_bpf.oldprog = oldprog ? oldprog->filter : NULL;
164 cls_bpf.exts_integrated = prog->exts_integrated; 166 cls_bpf.name = obj->bpf_name;
165 cls_bpf.gen_flags = prog->gen_flags; 167 cls_bpf.exts_integrated = obj->exts_integrated;
168 cls_bpf.gen_flags = obj->gen_flags;
166 169
167 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); 170 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
168 if (addorrep) { 171 if (prog) {
169 if (err < 0) { 172 if (err < 0) {
170 cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY); 173 cls_bpf_offload_cmd(tp, oldprog, prog);
171 return err; 174 return err;
172 } else if (err > 0) { 175 } else if (err > 0) {
173 prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; 176 prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
174 } 177 }
175 } 178 }
176 179
177 if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW)) 180 if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
178 return -EINVAL; 181 return -EINVAL;
179 182
180 return 0; 183 return 0;
@@ -183,38 +186,17 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
183static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, 186static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
184 struct cls_bpf_prog *oldprog) 187 struct cls_bpf_prog *oldprog)
185{ 188{
186 struct cls_bpf_prog *obj = prog; 189 if (prog && oldprog && prog->gen_flags != oldprog->gen_flags)
187 enum tc_clsbpf_command cmd; 190 return -EINVAL;
188 bool skip_sw;
189 int ret;
190
191 skip_sw = tc_skip_sw(prog->gen_flags) ||
192 (oldprog && tc_skip_sw(oldprog->gen_flags));
193
194 if (oldprog && oldprog->offloaded) {
195 if (!tc_skip_hw(prog->gen_flags)) {
196 cmd = TC_CLSBPF_REPLACE;
197 } else if (!tc_skip_sw(prog->gen_flags)) {
198 obj = oldprog;
199 cmd = TC_CLSBPF_DESTROY;
200 } else {
201 return -EINVAL;
202 }
203 } else {
204 if (tc_skip_hw(prog->gen_flags))
205 return skip_sw ? -EINVAL : 0;
206 cmd = TC_CLSBPF_ADD;
207 }
208
209 ret = cls_bpf_offload_cmd(tp, obj, cmd);
210 if (ret)
211 return ret;
212 191
213 obj->offloaded = true; 192 if (prog && tc_skip_hw(prog->gen_flags))
214 if (oldprog) 193 prog = NULL;
215 oldprog->offloaded = false; 194 if (oldprog && tc_skip_hw(oldprog->gen_flags))
195 oldprog = NULL;
196 if (!prog && !oldprog)
197 return 0;
216 198
217 return 0; 199 return cls_bpf_offload_cmd(tp, prog, oldprog);
218} 200}
219 201
220static void cls_bpf_stop_offload(struct tcf_proto *tp, 202static void cls_bpf_stop_offload(struct tcf_proto *tp,
@@ -222,25 +204,26 @@ static void cls_bpf_stop_offload(struct tcf_proto *tp,
222{ 204{
223 int err; 205 int err;
224 206
225 if (!prog->offloaded) 207 err = cls_bpf_offload_cmd(tp, NULL, prog);
226 return; 208 if (err)
227
228 err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
229 if (err) {
230 pr_err("Stopping hardware offload failed: %d\n", err); 209 pr_err("Stopping hardware offload failed: %d\n", err);
231 return;
232 }
233
234 prog->offloaded = false;
235} 210}
236 211
237static void cls_bpf_offload_update_stats(struct tcf_proto *tp, 212static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
238 struct cls_bpf_prog *prog) 213 struct cls_bpf_prog *prog)
239{ 214{
240 if (!prog->offloaded) 215 struct tcf_block *block = tp->chain->block;
241 return; 216 struct tc_cls_bpf_offload cls_bpf = {};
217
218 tc_cls_common_offload_init(&cls_bpf.common, tp);
219 cls_bpf.command = TC_CLSBPF_STATS;
220 cls_bpf.exts = &prog->exts;
221 cls_bpf.prog = prog->filter;
222 cls_bpf.name = prog->bpf_name;
223 cls_bpf.exts_integrated = prog->exts_integrated;
224 cls_bpf.gen_flags = prog->gen_flags;
242 225
243 cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS); 226 tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false);
244} 227}
245 228
246static int cls_bpf_init(struct tcf_proto *tp) 229static int cls_bpf_init(struct tcf_proto *tp)
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index ac152b4f4247..507859cdd1cb 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -45,7 +45,6 @@
45#include <net/netlink.h> 45#include <net/netlink.h>
46#include <net/act_api.h> 46#include <net/act_api.h>
47#include <net/pkt_cls.h> 47#include <net/pkt_cls.h>
48#include <linux/netdevice.h>
49#include <linux/idr.h> 48#include <linux/idr.h>
50 49
51struct tc_u_knode { 50struct tc_u_knode {
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b6c4f536876b..0f1eab99ff4e 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -795,6 +795,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
795 tcm->tcm_info = refcount_read(&q->refcnt); 795 tcm->tcm_info = refcount_read(&q->refcnt);
796 if (nla_put_string(skb, TCA_KIND, q->ops->id)) 796 if (nla_put_string(skb, TCA_KIND, q->ops->id))
797 goto nla_put_failure; 797 goto nla_put_failure;
798 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
799 goto nla_put_failure;
798 if (q->ops->dump && q->ops->dump(q, skb) < 0) 800 if (q->ops->dump && q->ops->dump(q, skb) < 0)
799 goto nla_put_failure; 801 goto nla_put_failure;
800 qlen = q->q.qlen; 802 qlen = q->q.qlen;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index cd1b200acae7..661c7144b53a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1040,6 +1040,8 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
1040 1040
1041 if (!tp_head) { 1041 if (!tp_head) {
1042 RCU_INIT_POINTER(*miniqp->p_miniq, NULL); 1042 RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
1043 /* Wait for flying RCU callback before it is freed. */
1044 rcu_barrier_bh();
1043 return; 1045 return;
1044 } 1046 }
1045 1047
@@ -1055,7 +1057,7 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
1055 rcu_assign_pointer(*miniqp->p_miniq, miniq); 1057 rcu_assign_pointer(*miniqp->p_miniq, miniq);
1056 1058
1057 if (miniq_old) 1059 if (miniq_old)
1058 /* This is counterpart of the rcu barrier above. We need to 1060 /* This is counterpart of the rcu barriers above. We need to
1059 * block potential new user of miniq_old until all readers 1061 * block potential new user of miniq_old until all readers
1060 * are not seeing it. 1062 * are not seeing it.
1061 */ 1063 */
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 5ecc38f35d47..fc1286f499c1 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -68,6 +68,8 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
68 struct net_device *dev = qdisc_dev(sch); 68 struct net_device *dev = qdisc_dev(sch);
69 int err; 69 int err;
70 70
71 net_inc_ingress_queue();
72
71 mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress); 73 mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
72 74
73 q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS; 75 q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
@@ -78,7 +80,6 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
78 if (err) 80 if (err)
79 return err; 81 return err;
80 82
81 net_inc_ingress_queue();
82 sch->flags |= TCQ_F_CPUSTATS; 83 sch->flags |= TCQ_F_CPUSTATS;
83 84
84 return 0; 85 return 0;
@@ -172,6 +173,9 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
172 struct net_device *dev = qdisc_dev(sch); 173 struct net_device *dev = qdisc_dev(sch);
173 int err; 174 int err;
174 175
176 net_inc_ingress_queue();
177 net_inc_egress_queue();
178
175 mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress); 179 mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress);
176 180
177 q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS; 181 q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
@@ -190,18 +194,11 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
190 194
191 err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info); 195 err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
192 if (err) 196 if (err)
193 goto err_egress_block_get; 197 return err;
194
195 net_inc_ingress_queue();
196 net_inc_egress_queue();
197 198
198 sch->flags |= TCQ_F_CPUSTATS; 199 sch->flags |= TCQ_F_CPUSTATS;
199 200
200 return 0; 201 return 0;
201
202err_egress_block_get:
203 tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
204 return err;
205} 202}
206 203
207static void clsact_destroy(struct Qdisc *sch) 204static void clsact_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 9d874e60e032..f0747eb87dc4 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -157,6 +157,7 @@ static int red_offload(struct Qdisc *sch, bool enable)
157 .handle = sch->handle, 157 .handle = sch->handle,
158 .parent = sch->parent, 158 .parent = sch->parent,
159 }; 159 };
160 int err;
160 161
161 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 162 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
162 return -EOPNOTSUPP; 163 return -EOPNOTSUPP;
@@ -171,7 +172,14 @@ static int red_offload(struct Qdisc *sch, bool enable)
171 opt.command = TC_RED_DESTROY; 172 opt.command = TC_RED_DESTROY;
172 } 173 }
173 174
174 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt); 175 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
176
177 if (!err && enable)
178 sch->flags |= TCQ_F_OFFLOADED;
179 else
180 sch->flags &= ~TCQ_F_OFFLOADED;
181
182 return err;
175} 183}
176 184
177static void red_destroy(struct Qdisc *sch) 185static void red_destroy(struct Qdisc *sch)
@@ -274,7 +282,7 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt)
274 return red_change(sch, opt); 282 return red_change(sch, opt);
275} 283}
276 284
277static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt) 285static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
278{ 286{
279 struct net_device *dev = qdisc_dev(sch); 287 struct net_device *dev = qdisc_dev(sch);
280 struct tc_red_qopt_offload hw_stats = { 288 struct tc_red_qopt_offload hw_stats = {
@@ -286,21 +294,12 @@ static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
286 .stats.qstats = &sch->qstats, 294 .stats.qstats = &sch->qstats,
287 }, 295 },
288 }; 296 };
289 int err;
290 297
291 opt->flags &= ~TC_RED_OFFLOADED; 298 if (!(sch->flags & TCQ_F_OFFLOADED))
292 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
293 return 0;
294
295 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
296 &hw_stats);
297 if (err == -EOPNOTSUPP)
298 return 0; 299 return 0;
299 300
300 if (!err) 301 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
301 opt->flags |= TC_RED_OFFLOADED; 302 &hw_stats);
302
303 return err;
304} 303}
305 304
306static int red_dump(struct Qdisc *sch, struct sk_buff *skb) 305static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -319,7 +318,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
319 int err; 318 int err;
320 319
321 sch->qstats.backlog = q->qdisc->qstats.backlog; 320 sch->qstats.backlog = q->qdisc->qstats.backlog;
322 err = red_dump_offload(sch, &opt); 321 err = red_dump_offload_stats(sch, &opt);
323 if (err) 322 if (err)
324 goto nla_put_failure; 323 goto nla_put_failure;
325 324
@@ -347,7 +346,7 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
347 .marked = q->stats.prob_mark + q->stats.forced_mark, 346 .marked = q->stats.prob_mark + q->stats.forced_mark,
348 }; 347 };
349 348
350 if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc) { 349 if (sch->flags & TCQ_F_OFFLOADED) {
351 struct red_stats hw_stats = {0}; 350 struct red_stats hw_stats = {0};
352 struct tc_red_qopt_offload hw_stats_request = { 351 struct tc_red_qopt_offload hw_stats_request = {
353 .command = TC_RED_XSTATS, 352 .command = TC_RED_XSTATS,
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 3f619fdcbf0a..291c97b07058 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -78,6 +78,9 @@ const char *sctp_cname(const union sctp_subtype cid)
78 case SCTP_CID_AUTH: 78 case SCTP_CID_AUTH:
79 return "AUTH"; 79 return "AUTH";
80 80
81 case SCTP_CID_RECONF:
82 return "RECONF";
83
81 default: 84 default:
82 break; 85 break;
83 } 86 }
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index eb17a911aa29..b4fb6e4886d2 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3891,13 +3891,17 @@ static int sctp_setsockopt_reset_streams(struct sock *sk,
3891 struct sctp_association *asoc; 3891 struct sctp_association *asoc;
3892 int retval = -EINVAL; 3892 int retval = -EINVAL;
3893 3893
3894 if (optlen < sizeof(struct sctp_reset_streams)) 3894 if (optlen < sizeof(*params))
3895 return -EINVAL; 3895 return -EINVAL;
3896 3896
3897 params = memdup_user(optval, optlen); 3897 params = memdup_user(optval, optlen);
3898 if (IS_ERR(params)) 3898 if (IS_ERR(params))
3899 return PTR_ERR(params); 3899 return PTR_ERR(params);
3900 3900
3901 if (params->srs_number_streams * sizeof(__u16) >
3902 optlen - sizeof(*params))
3903 goto out;
3904
3901 asoc = sctp_id2assoc(sk, params->srs_assoc_id); 3905 asoc = sctp_id2assoc(sk, params->srs_assoc_id);
3902 if (!asoc) 3906 if (!asoc)
3903 goto out; 3907 goto out;
@@ -4494,7 +4498,7 @@ static int sctp_init_sock(struct sock *sk)
4494 SCTP_DBG_OBJCNT_INC(sock); 4498 SCTP_DBG_OBJCNT_INC(sock);
4495 4499
4496 local_bh_disable(); 4500 local_bh_disable();
4497 percpu_counter_inc(&sctp_sockets_allocated); 4501 sk_sockets_allocated_inc(sk);
4498 sock_prot_inuse_add(net, sk->sk_prot, 1); 4502 sock_prot_inuse_add(net, sk->sk_prot, 1);
4499 4503
4500 /* Nothing can fail after this block, otherwise 4504 /* Nothing can fail after this block, otherwise
@@ -4538,7 +4542,7 @@ static void sctp_destroy_sock(struct sock *sk)
4538 } 4542 }
4539 sctp_endpoint_free(sp->ep); 4543 sctp_endpoint_free(sp->ep);
4540 local_bh_disable(); 4544 local_bh_disable();
4541 percpu_counter_dec(&sctp_sockets_allocated); 4545 sk_sockets_allocated_dec(sk);
4542 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 4546 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
4543 local_bh_enable(); 4547 local_bh_enable();
4544} 4548}
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index a71be33f3afe..e36ec5dd64c6 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -1084,29 +1084,21 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
1084void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, 1084void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
1085 gfp_t gfp) 1085 gfp_t gfp)
1086{ 1086{
1087 struct sctp_association *asoc; 1087 struct sctp_association *asoc = ulpq->asoc;
1088 __u16 needed, freed; 1088 __u32 freed = 0;
1089 1089 __u16 needed;
1090 asoc = ulpq->asoc;
1091 1090
1092 if (chunk) { 1091 needed = ntohs(chunk->chunk_hdr->length) -
1093 needed = ntohs(chunk->chunk_hdr->length); 1092 sizeof(struct sctp_data_chunk);
1094 needed -= sizeof(struct sctp_data_chunk);
1095 } else
1096 needed = SCTP_DEFAULT_MAXWINDOW;
1097
1098 freed = 0;
1099 1093
1100 if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) { 1094 if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
1101 freed = sctp_ulpq_renege_order(ulpq, needed); 1095 freed = sctp_ulpq_renege_order(ulpq, needed);
1102 if (freed < needed) { 1096 if (freed < needed)
1103 freed += sctp_ulpq_renege_frags(ulpq, needed - freed); 1097 freed += sctp_ulpq_renege_frags(ulpq, needed - freed);
1104 }
1105 } 1098 }
1106 /* If able to free enough room, accept this chunk. */ 1099 /* If able to free enough room, accept this chunk. */
1107 if (chunk && (freed >= needed)) { 1100 if (freed >= needed) {
1108 int retval; 1101 int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
1109 retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
1110 /* 1102 /*
1111 * Enter partial delivery if chunk has not been 1103 * Enter partial delivery if chunk has not been
1112 * delivered; otherwise, drain the reassembly queue. 1104 * delivered; otherwise, drain the reassembly queue.
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index c5fda15ba319..1fdab5c4eda8 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -401,7 +401,7 @@ void strp_data_ready(struct strparser *strp)
401 * allows a thread in BH context to safely check if the process 401 * allows a thread in BH context to safely check if the process
402 * lock is held. In this case, if the lock is held, queue work. 402 * lock is held. In this case, if the lock is held, queue work.
403 */ 403 */
404 if (sock_owned_by_user(strp->sk)) { 404 if (sock_owned_by_user_nocheck(strp->sk)) {
405 queue_work(strp_wq, &strp->work); 405 queue_work(strp_wq, &strp->work);
406 return; 406 return;
407 } 407 }
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index c4778cae58ef..444380f968f1 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -231,6 +231,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
231 goto out_free_groups; 231 goto out_free_groups;
232 creds->cr_group_info->gid[i] = kgid; 232 creds->cr_group_info->gid[i] = kgid;
233 } 233 }
234 groups_sort(creds->cr_group_info);
234 235
235 return 0; 236 return 0;
236out_free_groups: 237out_free_groups:
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 5dd4e6c9fef2..26531193fce4 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -481,6 +481,7 @@ static int rsc_parse(struct cache_detail *cd,
481 goto out; 481 goto out;
482 rsci.cred.cr_group_info->gid[i] = kgid; 482 rsci.cred.cr_group_info->gid[i] = kgid;
483 } 483 }
484 groups_sort(rsci.cred.cr_group_info);
484 485
485 /* mech name */ 486 /* mech name */
486 len = qword_get(&mesg, buf, mlen); 487 len = qword_get(&mesg, buf, mlen);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 740b67d5a733..af7f28fb8102 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -520,6 +520,7 @@ static int unix_gid_parse(struct cache_detail *cd,
520 ug.gi->gid[i] = kgid; 520 ug.gi->gid[i] = kgid;
521 } 521 }
522 522
523 groups_sort(ug.gi);
523 ugp = unix_gid_lookup(cd, uid); 524 ugp = unix_gid_lookup(cd, uid);
524 if (ugp) { 525 if (ugp) {
525 struct cache_head *ch; 526 struct cache_head *ch;
@@ -819,6 +820,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
819 kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv)); 820 kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv));
820 cred->cr_group_info->gid[i] = kgid; 821 cred->cr_group_info->gid[i] = kgid;
821 } 822 }
823 groups_sort(cred->cr_group_info);
822 if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { 824 if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
823 *authp = rpc_autherr_badverf; 825 *authp = rpc_autherr_badverf;
824 return SVC_DENIED; 826 return SVC_DENIED;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 333b9d697ae5..33b74fd84051 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1001,6 +1001,7 @@ void xprt_transmit(struct rpc_task *task)
1001{ 1001{
1002 struct rpc_rqst *req = task->tk_rqstp; 1002 struct rpc_rqst *req = task->tk_rqstp;
1003 struct rpc_xprt *xprt = req->rq_xprt; 1003 struct rpc_xprt *xprt = req->rq_xprt;
1004 unsigned int connect_cookie;
1004 int status, numreqs; 1005 int status, numreqs;
1005 1006
1006 dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); 1007 dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
@@ -1024,6 +1025,7 @@ void xprt_transmit(struct rpc_task *task)
1024 } else if (!req->rq_bytes_sent) 1025 } else if (!req->rq_bytes_sent)
1025 return; 1026 return;
1026 1027
1028 connect_cookie = xprt->connect_cookie;
1027 req->rq_xtime = ktime_get(); 1029 req->rq_xtime = ktime_get();
1028 status = xprt->ops->send_request(task); 1030 status = xprt->ops->send_request(task);
1029 trace_xprt_transmit(xprt, req->rq_xid, status); 1031 trace_xprt_transmit(xprt, req->rq_xid, status);
@@ -1047,20 +1049,28 @@ void xprt_transmit(struct rpc_task *task)
1047 xprt->stat.bklog_u += xprt->backlog.qlen; 1049 xprt->stat.bklog_u += xprt->backlog.qlen;
1048 xprt->stat.sending_u += xprt->sending.qlen; 1050 xprt->stat.sending_u += xprt->sending.qlen;
1049 xprt->stat.pending_u += xprt->pending.qlen; 1051 xprt->stat.pending_u += xprt->pending.qlen;
1052 spin_unlock_bh(&xprt->transport_lock);
1050 1053
1051 /* Don't race with disconnect */ 1054 req->rq_connect_cookie = connect_cookie;
1052 if (!xprt_connected(xprt)) 1055 if (rpc_reply_expected(task) && !READ_ONCE(req->rq_reply_bytes_recvd)) {
1053 task->tk_status = -ENOTCONN;
1054 else {
1055 /* 1056 /*
1056 * Sleep on the pending queue since 1057 * Sleep on the pending queue if we're expecting a reply.
1057 * we're expecting a reply. 1058 * The spinlock ensures atomicity between the test of
1059 * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on().
1058 */ 1060 */
1059 if (!req->rq_reply_bytes_recvd && rpc_reply_expected(task)) 1061 spin_lock(&xprt->recv_lock);
1062 if (!req->rq_reply_bytes_recvd) {
1060 rpc_sleep_on(&xprt->pending, task, xprt_timer); 1063 rpc_sleep_on(&xprt->pending, task, xprt_timer);
1061 req->rq_connect_cookie = xprt->connect_cookie; 1064 /*
1065 * Send an extra queue wakeup call if the
1066 * connection was dropped in case the call to
1067 * rpc_sleep_on() raced.
1068 */
1069 if (!xprt_connected(xprt))
1070 xprt_wake_pending_tasks(xprt, -ENOTCONN);
1071 }
1072 spin_unlock(&xprt->recv_lock);
1062 } 1073 }
1063 spin_unlock_bh(&xprt->transport_lock);
1064} 1074}
1065 1075
1066static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task) 1076static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index ed34dc0f144c..a3f2ab283aeb 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1408,11 +1408,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1408 dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n", 1408 dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
1409 __func__, rep, req, be32_to_cpu(rep->rr_xid)); 1409 __func__, rep, req, be32_to_cpu(rep->rr_xid));
1410 1410
1411 if (list_empty(&req->rl_registered) && 1411 queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work);
1412 !test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags))
1413 rpcrdma_complete_rqst(rep);
1414 else
1415 queue_work(rpcrdma_receive_wq, &rep->rr_work);
1416 return; 1412 return;
1417 1413
1418out_badstatus: 1414out_badstatus:
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 646c24494ea7..6ee1ad8978f3 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -52,6 +52,7 @@
52#include <linux/slab.h> 52#include <linux/slab.h>
53#include <linux/seq_file.h> 53#include <linux/seq_file.h>
54#include <linux/sunrpc/addr.h> 54#include <linux/sunrpc/addr.h>
55#include <linux/smp.h>
55 56
56#include "xprt_rdma.h" 57#include "xprt_rdma.h"
57 58
@@ -656,6 +657,7 @@ xprt_rdma_allocate(struct rpc_task *task)
656 task->tk_pid, __func__, rqst->rq_callsize, 657 task->tk_pid, __func__, rqst->rq_callsize,
657 rqst->rq_rcvsize, req); 658 rqst->rq_rcvsize, req);
658 659
660 req->rl_cpu = smp_processor_id();
659 req->rl_connect_cookie = 0; /* our reserved value */ 661 req->rl_connect_cookie = 0; /* our reserved value */
660 rpcrdma_set_xprtdata(rqst, req); 662 rpcrdma_set_xprtdata(rqst, req);
661 rqst->rq_buffer = req->rl_sendbuf->rg_base; 663 rqst->rq_buffer = req->rl_sendbuf->rg_base;
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 710b3f77db82..8607c029c0dd 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -83,7 +83,7 @@ rpcrdma_alloc_wq(void)
83 struct workqueue_struct *recv_wq; 83 struct workqueue_struct *recv_wq;
84 84
85 recv_wq = alloc_workqueue("xprtrdma_receive", 85 recv_wq = alloc_workqueue("xprtrdma_receive",
86 WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_HIGHPRI, 86 WQ_MEM_RECLAIM | WQ_HIGHPRI,
87 0); 87 0);
88 if (!recv_wq) 88 if (!recv_wq)
89 return -ENOMEM; 89 return -ENOMEM;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 51686d9eac5f..1342f743f1c4 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -342,6 +342,7 @@ enum {
342struct rpcrdma_buffer; 342struct rpcrdma_buffer;
343struct rpcrdma_req { 343struct rpcrdma_req {
344 struct list_head rl_list; 344 struct list_head rl_list;
345 int rl_cpu;
345 unsigned int rl_connect_cookie; 346 unsigned int rl_connect_cookie;
346 struct rpcrdma_buffer *rl_buffer; 347 struct rpcrdma_buffer *rl_buffer;
347 struct rpcrdma_rep *rl_reply; 348 struct rpcrdma_rep *rl_reply;
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 47ec121574ce..c8001471da6c 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -324,6 +324,7 @@ restart:
324 if (res) { 324 if (res) {
325 pr_warn("Bearer <%s> rejected, enable failure (%d)\n", 325 pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
326 name, -res); 326 name, -res);
327 kfree(b);
327 return -EINVAL; 328 return -EINVAL;
328 } 329 }
329 330
@@ -347,8 +348,10 @@ restart:
347 if (skb) 348 if (skb)
348 tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); 349 tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
349 350
350 if (tipc_mon_create(net, bearer_id)) 351 if (tipc_mon_create(net, bearer_id)) {
352 bearer_disable(net, b);
351 return -ENOMEM; 353 return -ENOMEM;
354 }
352 355
353 pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", 356 pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
354 name, 357 name,
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 95fec2c057d6..8e12ab55346b 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -351,8 +351,7 @@ void tipc_group_update_member(struct tipc_member *m, int len)
351 if (m->window >= ADV_IDLE) 351 if (m->window >= ADV_IDLE)
352 return; 352 return;
353 353
354 if (!list_empty(&m->congested)) 354 list_del_init(&m->congested);
355 return;
356 355
357 /* Sort member into congested members' list */ 356 /* Sort member into congested members' list */
358 list_for_each_entry_safe(_m, tmp, &grp->congested, congested) { 357 list_for_each_entry_safe(_m, tmp, &grp->congested, congested) {
@@ -369,18 +368,20 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
369 u16 prev = grp->bc_snd_nxt - 1; 368 u16 prev = grp->bc_snd_nxt - 1;
370 struct tipc_member *m; 369 struct tipc_member *m;
371 struct rb_node *n; 370 struct rb_node *n;
371 u16 ackers = 0;
372 372
373 for (n = rb_first(&grp->members); n; n = rb_next(n)) { 373 for (n = rb_first(&grp->members); n; n = rb_next(n)) {
374 m = container_of(n, struct tipc_member, tree_node); 374 m = container_of(n, struct tipc_member, tree_node);
375 if (tipc_group_is_enabled(m)) { 375 if (tipc_group_is_enabled(m)) {
376 tipc_group_update_member(m, len); 376 tipc_group_update_member(m, len);
377 m->bc_acked = prev; 377 m->bc_acked = prev;
378 ackers++;
378 } 379 }
379 } 380 }
380 381
381 /* Mark number of acknowledges to expect, if any */ 382 /* Mark number of acknowledges to expect, if any */
382 if (ack) 383 if (ack)
383 grp->bc_ackers = grp->member_cnt; 384 grp->bc_ackers = ackers;
384 grp->bc_snd_nxt++; 385 grp->bc_snd_nxt++;
385} 386}
386 387
@@ -648,6 +649,7 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
648 } else if (mtyp == GRP_REMIT_MSG) { 649 } else if (mtyp == GRP_REMIT_MSG) {
649 msg_set_grp_remitted(hdr, m->window); 650 msg_set_grp_remitted(hdr, m->window);
650 } 651 }
652 msg_set_dest_droppable(hdr, true);
651 __skb_queue_tail(xmitq, skb); 653 __skb_queue_tail(xmitq, skb);
652} 654}
653 655
@@ -689,15 +691,16 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
689 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); 691 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
690 __skb_queue_tail(inputq, m->event_msg); 692 __skb_queue_tail(inputq, m->event_msg);
691 } 693 }
692 if (m->window < ADV_IDLE) 694 list_del_init(&m->congested);
693 tipc_group_update_member(m, 0); 695 tipc_group_update_member(m, 0);
694 else
695 list_del_init(&m->congested);
696 return; 696 return;
697 case GRP_LEAVE_MSG: 697 case GRP_LEAVE_MSG:
698 if (!m) 698 if (!m)
699 return; 699 return;
700 m->bc_syncpt = msg_grp_bc_syncpt(hdr); 700 m->bc_syncpt = msg_grp_bc_syncpt(hdr);
701 list_del_init(&m->list);
702 list_del_init(&m->congested);
703 *usr_wakeup = true;
701 704
702 /* Wait until WITHDRAW event is received */ 705 /* Wait until WITHDRAW event is received */
703 if (m->state != MBR_LEAVING) { 706 if (m->state != MBR_LEAVING) {
@@ -709,8 +712,6 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
709 ehdr = buf_msg(m->event_msg); 712 ehdr = buf_msg(m->event_msg);
710 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); 713 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
711 __skb_queue_tail(inputq, m->event_msg); 714 __skb_queue_tail(inputq, m->event_msg);
712 *usr_wakeup = true;
713 list_del_init(&m->congested);
714 return; 715 return;
715 case GRP_ADV_MSG: 716 case GRP_ADV_MSG:
716 if (!m) 717 if (!m)
@@ -849,19 +850,29 @@ void tipc_group_member_evt(struct tipc_group *grp,
849 *usr_wakeup = true; 850 *usr_wakeup = true;
850 m->usr_pending = false; 851 m->usr_pending = false;
851 node_up = tipc_node_is_up(net, node); 852 node_up = tipc_node_is_up(net, node);
852 853 m->event_msg = NULL;
853 /* Hold back event if more messages might be expected */ 854
854 if (m->state != MBR_LEAVING && node_up) { 855 if (node_up) {
855 m->event_msg = skb; 856 /* Hold back event if a LEAVE msg should be expected */
856 tipc_group_decr_active(grp, m); 857 if (m->state != MBR_LEAVING) {
857 m->state = MBR_LEAVING; 858 m->event_msg = skb;
858 } else { 859 tipc_group_decr_active(grp, m);
859 if (node_up) 860 m->state = MBR_LEAVING;
861 } else {
860 msg_set_grp_bc_seqno(hdr, m->bc_syncpt); 862 msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
861 else 863 __skb_queue_tail(inputq, skb);
864 }
865 } else {
866 if (m->state != MBR_LEAVING) {
867 tipc_group_decr_active(grp, m);
868 m->state = MBR_LEAVING;
862 msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt); 869 msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
870 } else {
871 msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
872 }
863 __skb_queue_tail(inputq, skb); 873 __skb_queue_tail(inputq, skb);
864 } 874 }
875 list_del_init(&m->list);
865 list_del_init(&m->congested); 876 list_del_init(&m->congested);
866 } 877 }
867 *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); 878 *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 8e884ed06d4b..32dc33a94bc7 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -642,9 +642,13 @@ void tipc_mon_delete(struct net *net, int bearer_id)
642{ 642{
643 struct tipc_net *tn = tipc_net(net); 643 struct tipc_net *tn = tipc_net(net);
644 struct tipc_monitor *mon = tipc_monitor(net, bearer_id); 644 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
645 struct tipc_peer *self = get_self(net, bearer_id); 645 struct tipc_peer *self;
646 struct tipc_peer *peer, *tmp; 646 struct tipc_peer *peer, *tmp;
647 647
648 if (!mon)
649 return;
650
651 self = get_self(net, bearer_id);
648 write_lock_bh(&mon->lock); 652 write_lock_bh(&mon->lock);
649 tn->monitors[bearer_id] = NULL; 653 tn->monitors[bearer_id] = NULL;
650 list_for_each_entry_safe(peer, tmp, &self->list, list) { 654 list_for_each_entry_safe(peer, tmp, &self->list, list) {
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 5d18c0caa92b..3b4084480377 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -727,11 +727,11 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
727 727
728 switch (sk->sk_state) { 728 switch (sk->sk_state) {
729 case TIPC_ESTABLISHED: 729 case TIPC_ESTABLISHED:
730 case TIPC_CONNECTING:
730 if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) 731 if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
731 revents |= POLLOUT; 732 revents |= POLLOUT;
732 /* fall thru' */ 733 /* fall thru' */
733 case TIPC_LISTEN: 734 case TIPC_LISTEN:
734 case TIPC_CONNECTING:
735 if (!skb_queue_empty(&sk->sk_receive_queue)) 735 if (!skb_queue_empty(&sk->sk_receive_queue))
736 revents |= POLLIN | POLLRDNORM; 736 revents |= POLLIN | POLLRDNORM;
737 break; 737 break;
@@ -1140,7 +1140,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
1140 __skb_dequeue(arrvq); 1140 __skb_dequeue(arrvq);
1141 __skb_queue_tail(inputq, skb); 1141 __skb_queue_tail(inputq, skb);
1142 } 1142 }
1143 refcount_dec(&skb->users); 1143 kfree_skb(skb);
1144 spin_unlock_bh(&inputq->lock); 1144 spin_unlock_bh(&inputq->lock);
1145 continue; 1145 continue;
1146 } 1146 }
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 278d979c211a..1d84f91bbfb0 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -23,19 +23,36 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
23cfg80211-y += extra-certs.o 23cfg80211-y += extra-certs.o
24endif 24endif
25 25
26$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509) 26$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
27 @$(kecho) " GEN $@" 27 @$(kecho) " GEN $@"
28 @echo '#include "reg.h"' > $@ 28 @(echo '#include "reg.h"'; \
29 @echo 'const u8 shipped_regdb_certs[] = {' >> $@ 29 echo 'const u8 shipped_regdb_certs[] = {'; \
30 @for f in $^ ; do hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ ; done 30 cat $^ ; \
31 @echo '};' >> $@ 31 echo '};'; \
32 @echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);' >> $@ 32 echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
33 ) > $@
33 34
34$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ 35$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
35 $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509) 36 $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
36 @$(kecho) " GEN $@" 37 @$(kecho) " GEN $@"
37 @echo '#include "reg.h"' > $@ 38 @(set -e; \
38 @echo 'const u8 extra_regdb_certs[] = {' >> $@ 39 allf=""; \
39 @for f in $^ ; do test -f $$f && hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ || true ; done 40 for f in $^ ; do \
40 @echo '};' >> $@ 41 # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \
41 @echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);' >> $@ 42 thisf=$$(od -An -v -tx1 < $$f | \
43 sed -e 's/ /\n/g' | \
44 sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \
45 sed -e 's/^/0x/;s/$$/,/'); \
46 # file should not be empty - maybe command substitution failed? \
47 test ! -z "$$thisf";\
48 allf=$$allf$$thisf;\
49 done; \
50 ( \
51 echo '#include "reg.h"'; \
52 echo 'const u8 extra_regdb_certs[] = {'; \
53 echo "$$allf"; \
54 echo '};'; \
55 echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \
56 ) > $@)
57
58clean-files += shipped-certs.c extra-certs.c
diff --git a/net/wireless/certs/sforshee.hex b/net/wireless/certs/sforshee.hex
new file mode 100644
index 000000000000..14ea66643ffa
--- /dev/null
+++ b/net/wireless/certs/sforshee.hex
@@ -0,0 +1,86 @@
1/* Seth Forshee's regdb certificate */
20x30, 0x82, 0x02, 0xa4, 0x30, 0x82, 0x01, 0x8c,
30x02, 0x09, 0x00, 0xb2, 0x8d, 0xdf, 0x47, 0xae,
40xf9, 0xce, 0xa7, 0x30, 0x0d, 0x06, 0x09, 0x2a,
50x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0b,
60x05, 0x00, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f,
70x06, 0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73,
80x66, 0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30,
90x20, 0x17, 0x0d, 0x31, 0x37, 0x31, 0x30, 0x30,
100x36, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35, 0x5a,
110x18, 0x0f, 0x32, 0x31, 0x31, 0x37, 0x30, 0x39,
120x31, 0x32, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35,
130x5a, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f, 0x06,
140x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73, 0x66,
150x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30, 0x82,
160x01, 0x22, 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86,
170x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05,
180x00, 0x03, 0x82, 0x01, 0x0f, 0x00, 0x30, 0x82,
190x01, 0x0a, 0x02, 0x82, 0x01, 0x01, 0x00, 0xb5,
200x40, 0xe3, 0x9c, 0x28, 0x84, 0x39, 0x03, 0xf2,
210x39, 0xd7, 0x66, 0x2c, 0x41, 0x38, 0x15, 0xac,
220x7e, 0xa5, 0x83, 0x71, 0x25, 0x7e, 0x90, 0x7c,
230x68, 0xdd, 0x6f, 0x3f, 0xd9, 0xd7, 0x59, 0x38,
240x9f, 0x7c, 0x6a, 0x52, 0xc2, 0x03, 0x2a, 0x2d,
250x7e, 0x66, 0xf4, 0x1e, 0xb3, 0x12, 0x70, 0x20,
260x5b, 0xd4, 0x97, 0x32, 0x3d, 0x71, 0x8b, 0x3b,
270x1b, 0x08, 0x17, 0x14, 0x6b, 0x61, 0xc4, 0x57,
280x8b, 0x96, 0x16, 0x1c, 0xfd, 0x24, 0xd5, 0x0b,
290x09, 0xf9, 0x68, 0x11, 0x84, 0xfb, 0xca, 0x51,
300x0c, 0xd1, 0x45, 0x19, 0xda, 0x10, 0x44, 0x8a,
310xd9, 0xfe, 0x76, 0xa9, 0xfd, 0x60, 0x2d, 0x18,
320x0b, 0x28, 0x95, 0xb2, 0x2d, 0xea, 0x88, 0x98,
330xb8, 0xd1, 0x56, 0x21, 0xf0, 0x53, 0x1f, 0xf1,
340x02, 0x6f, 0xe9, 0x46, 0x9b, 0x93, 0x5f, 0x28,
350x90, 0x0f, 0xac, 0x36, 0xfa, 0x68, 0x23, 0x71,
360x57, 0x56, 0xf6, 0xcc, 0xd3, 0xdf, 0x7d, 0x2a,
370xd9, 0x1b, 0x73, 0x45, 0xeb, 0xba, 0x27, 0x85,
380xef, 0x7a, 0x7f, 0xa5, 0xcb, 0x80, 0xc7, 0x30,
390x36, 0xd2, 0x53, 0xee, 0xec, 0xac, 0x1e, 0xe7,
400x31, 0xf1, 0x36, 0xa2, 0x9c, 0x63, 0xc6, 0x65,
410x5b, 0x7f, 0x25, 0x75, 0x68, 0xa1, 0xea, 0xd3,
420x7e, 0x00, 0x5c, 0x9a, 0x5e, 0xd8, 0x20, 0x18,
430x32, 0x77, 0x07, 0x29, 0x12, 0x66, 0x1e, 0x36,
440x73, 0xe7, 0x97, 0x04, 0x41, 0x37, 0xb1, 0xb1,
450x72, 0x2b, 0xf4, 0xa1, 0x29, 0x20, 0x7c, 0x96,
460x79, 0x0b, 0x2b, 0xd0, 0xd8, 0xde, 0xc8, 0x6c,
470x3f, 0x93, 0xfb, 0xc5, 0xee, 0x78, 0x52, 0x11,
480x15, 0x1b, 0x7a, 0xf6, 0xe2, 0x68, 0x99, 0xe7,
490xfb, 0x46, 0x16, 0x84, 0xe3, 0xc7, 0xa1, 0xe6,
500xe0, 0xd2, 0x46, 0xd5, 0xe1, 0xc4, 0x5f, 0xa0,
510x66, 0xf4, 0xda, 0xc4, 0xff, 0x95, 0x1d, 0x02,
520x03, 0x01, 0x00, 0x01, 0x30, 0x0d, 0x06, 0x09,
530x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01,
540x0b, 0x05, 0x00, 0x03, 0x82, 0x01, 0x01, 0x00,
550x87, 0x03, 0xda, 0xf2, 0x82, 0xc2, 0xdd, 0xaf,
560x7c, 0x44, 0x2f, 0x86, 0xd3, 0x5f, 0x4c, 0x93,
570x48, 0xb9, 0xfe, 0x07, 0x17, 0xbb, 0x21, 0xf7,
580x25, 0x23, 0x4e, 0xaa, 0x22, 0x0c, 0x16, 0xb9,
590x73, 0xae, 0x9d, 0x46, 0x7c, 0x75, 0xd9, 0xc3,
600x49, 0x57, 0x47, 0xbf, 0x33, 0xb7, 0x97, 0xec,
610xf5, 0x40, 0x75, 0xc0, 0x46, 0x22, 0xf0, 0xa0,
620x5d, 0x9c, 0x79, 0x13, 0xa1, 0xff, 0xb8, 0xa3,
630x2f, 0x7b, 0x8e, 0x06, 0x3f, 0xc8, 0xb6, 0xe4,
640x6a, 0x28, 0xf2, 0x34, 0x5c, 0x23, 0x3f, 0x32,
650xc0, 0xe6, 0xad, 0x0f, 0xac, 0xcf, 0x55, 0x74,
660x47, 0x73, 0xd3, 0x01, 0x85, 0xb7, 0x0b, 0x22,
670x56, 0x24, 0x7d, 0x9f, 0x09, 0xa9, 0x0e, 0x86,
680x9e, 0x37, 0x5b, 0x9c, 0x6d, 0x02, 0xd9, 0x8c,
690xc8, 0x50, 0x6a, 0xe2, 0x59, 0xf3, 0x16, 0x06,
700xea, 0xb2, 0x42, 0xb5, 0x58, 0xfe, 0xba, 0xd1,
710x81, 0x57, 0x1a, 0xef, 0xb2, 0x38, 0x88, 0x58,
720xf6, 0xaa, 0xc4, 0x2e, 0x8b, 0x5a, 0x27, 0xe4,
730xa5, 0xe8, 0xa4, 0xca, 0x67, 0x5c, 0xac, 0x72,
740x67, 0xc3, 0x6f, 0x13, 0xc3, 0x2d, 0x35, 0x79,
750xd7, 0x8a, 0xe7, 0xf5, 0xd4, 0x21, 0x30, 0x4a,
760xd5, 0xf6, 0xa3, 0xd9, 0x79, 0x56, 0xf2, 0x0f,
770x10, 0xf7, 0x7d, 0xd0, 0x51, 0x93, 0x2f, 0x47,
780xf8, 0x7d, 0x4b, 0x0a, 0x84, 0x55, 0x12, 0x0a,
790x7d, 0x4e, 0x3b, 0x1f, 0x2b, 0x2f, 0xfc, 0x28,
800xb3, 0x69, 0x34, 0xe1, 0x80, 0x80, 0xbb, 0xe2,
810xaf, 0xb9, 0xd6, 0x30, 0xf1, 0x1d, 0x54, 0x87,
820x23, 0x99, 0x9f, 0x51, 0x03, 0x4c, 0x45, 0x7d,
830x02, 0x65, 0x73, 0xab, 0xfd, 0xcf, 0x94, 0xcc,
840x0d, 0x3a, 0x60, 0xfd, 0x3c, 0x14, 0x2f, 0x16,
850x33, 0xa9, 0x21, 0x1f, 0xcb, 0x50, 0xb1, 0x8f,
860x03, 0xee, 0xa0, 0x66, 0xa9, 0x16, 0x79, 0x14,
diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509
deleted file mode 100644
index c6f8f9d6b988..000000000000
--- a/net/wireless/certs/sforshee.x509
+++ /dev/null
Binary files differ
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b1ac23ca20c8..213d0c498c97 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2610,7 +2610,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
2610 case NL80211_IFTYPE_AP: 2610 case NL80211_IFTYPE_AP:
2611 if (wdev->ssid_len && 2611 if (wdev->ssid_len &&
2612 nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid)) 2612 nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
2613 goto nla_put_failure; 2613 goto nla_put_failure_locked;
2614 break; 2614 break;
2615 case NL80211_IFTYPE_STATION: 2615 case NL80211_IFTYPE_STATION:
2616 case NL80211_IFTYPE_P2P_CLIENT: 2616 case NL80211_IFTYPE_P2P_CLIENT:
@@ -2623,7 +2623,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
2623 if (!ssid_ie) 2623 if (!ssid_ie)
2624 break; 2624 break;
2625 if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2)) 2625 if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
2626 goto nla_put_failure; 2626 goto nla_put_failure_locked;
2627 break; 2627 break;
2628 } 2628 }
2629 default: 2629 default:
@@ -2635,6 +2635,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
2635 genlmsg_end(msg, hdr); 2635 genlmsg_end(msg, hdr);
2636 return 0; 2636 return 0;
2637 2637
2638 nla_put_failure_locked:
2639 wdev_unlock(wdev);
2638 nla_put_failure: 2640 nla_put_failure:
2639 genlmsg_cancel(msg, hdr); 2641 genlmsg_cancel(msg, hdr);
2640 return -EMSGSIZE; 2642 return -EMSGSIZE;
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 347ab31574d5..3f6f6f8c9fa5 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -8,15 +8,29 @@
8 * 8 *
9 */ 9 */
10 10
11#include <linux/bottom_half.h>
12#include <linux/interrupt.h>
11#include <linux/slab.h> 13#include <linux/slab.h>
12#include <linux/module.h> 14#include <linux/module.h>
13#include <linux/netdevice.h> 15#include <linux/netdevice.h>
16#include <linux/percpu.h>
14#include <net/dst.h> 17#include <net/dst.h>
15#include <net/ip.h> 18#include <net/ip.h>
16#include <net/xfrm.h> 19#include <net/xfrm.h>
17#include <net/ip_tunnels.h> 20#include <net/ip_tunnels.h>
18#include <net/ip6_tunnel.h> 21#include <net/ip6_tunnel.h>
19 22
23struct xfrm_trans_tasklet {
24 struct tasklet_struct tasklet;
25 struct sk_buff_head queue;
26};
27
28struct xfrm_trans_cb {
29 int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb);
30};
31
32#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
33
20static struct kmem_cache *secpath_cachep __read_mostly; 34static struct kmem_cache *secpath_cachep __read_mostly;
21 35
22static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); 36static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
@@ -25,6 +39,8 @@ static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1];
25static struct gro_cells gro_cells; 39static struct gro_cells gro_cells;
26static struct net_device xfrm_napi_dev; 40static struct net_device xfrm_napi_dev;
27 41
42static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet);
43
28int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo) 44int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo)
29{ 45{
30 int err = 0; 46 int err = 0;
@@ -207,7 +223,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
207 xfrm_address_t *daddr; 223 xfrm_address_t *daddr;
208 struct xfrm_mode *inner_mode; 224 struct xfrm_mode *inner_mode;
209 u32 mark = skb->mark; 225 u32 mark = skb->mark;
210 unsigned int family; 226 unsigned int family = AF_UNSPEC;
211 int decaps = 0; 227 int decaps = 0;
212 int async = 0; 228 int async = 0;
213 bool xfrm_gro = false; 229 bool xfrm_gro = false;
@@ -216,6 +232,16 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
216 232
217 if (encap_type < 0) { 233 if (encap_type < 0) {
218 x = xfrm_input_state(skb); 234 x = xfrm_input_state(skb);
235
236 if (unlikely(x->km.state != XFRM_STATE_VALID)) {
237 if (x->km.state == XFRM_STATE_ACQ)
238 XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
239 else
240 XFRM_INC_STATS(net,
241 LINUX_MIB_XFRMINSTATEINVALID);
242 goto drop;
243 }
244
219 family = x->outer_mode->afinfo->family; 245 family = x->outer_mode->afinfo->family;
220 246
221 /* An encap_type of -1 indicates async resumption. */ 247 /* An encap_type of -1 indicates async resumption. */
@@ -467,9 +493,41 @@ int xfrm_input_resume(struct sk_buff *skb, int nexthdr)
467} 493}
468EXPORT_SYMBOL(xfrm_input_resume); 494EXPORT_SYMBOL(xfrm_input_resume);
469 495
496static void xfrm_trans_reinject(unsigned long data)
497{
498 struct xfrm_trans_tasklet *trans = (void *)data;
499 struct sk_buff_head queue;
500 struct sk_buff *skb;
501
502 __skb_queue_head_init(&queue);
503 skb_queue_splice_init(&trans->queue, &queue);
504
505 while ((skb = __skb_dequeue(&queue)))
506 XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb);
507}
508
509int xfrm_trans_queue(struct sk_buff *skb,
510 int (*finish)(struct net *, struct sock *,
511 struct sk_buff *))
512{
513 struct xfrm_trans_tasklet *trans;
514
515 trans = this_cpu_ptr(&xfrm_trans_tasklet);
516
517 if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
518 return -ENOBUFS;
519
520 XFRM_TRANS_SKB_CB(skb)->finish = finish;
521 skb_queue_tail(&trans->queue, skb);
522 tasklet_schedule(&trans->tasklet);
523 return 0;
524}
525EXPORT_SYMBOL(xfrm_trans_queue);
526
470void __init xfrm_input_init(void) 527void __init xfrm_input_init(void)
471{ 528{
472 int err; 529 int err;
530 int i;
473 531
474 init_dummy_netdev(&xfrm_napi_dev); 532 init_dummy_netdev(&xfrm_napi_dev);
475 err = gro_cells_init(&gro_cells, &xfrm_napi_dev); 533 err = gro_cells_init(&gro_cells, &xfrm_napi_dev);
@@ -480,4 +538,13 @@ void __init xfrm_input_init(void)
480 sizeof(struct sec_path), 538 sizeof(struct sec_path),
481 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 539 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
482 NULL); 540 NULL);
541
542 for_each_possible_cpu(i) {
543 struct xfrm_trans_tasklet *trans;
544
545 trans = &per_cpu(xfrm_trans_tasklet, i);
546 __skb_queue_head_init(&trans->queue);
547 tasklet_init(&trans->tasklet, xfrm_trans_reinject,
548 (unsigned long)trans);
549 }
483} 550}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 9542975eb2f9..70aa5cb0c659 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1168,9 +1168,15 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
1168 again: 1168 again:
1169 pol = rcu_dereference(sk->sk_policy[dir]); 1169 pol = rcu_dereference(sk->sk_policy[dir]);
1170 if (pol != NULL) { 1170 if (pol != NULL) {
1171 bool match = xfrm_selector_match(&pol->selector, fl, family); 1171 bool match;
1172 int err = 0; 1172 int err = 0;
1173 1173
1174 if (pol->family != family) {
1175 pol = NULL;
1176 goto out;
1177 }
1178
1179 match = xfrm_selector_match(&pol->selector, fl, family);
1174 if (match) { 1180 if (match) {
1175 if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { 1181 if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
1176 pol = NULL; 1182 pol = NULL;
@@ -1833,6 +1839,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1833 sizeof(struct xfrm_policy *) * num_pols) == 0 && 1839 sizeof(struct xfrm_policy *) * num_pols) == 0 &&
1834 xfrm_xdst_can_reuse(xdst, xfrm, err)) { 1840 xfrm_xdst_can_reuse(xdst, xfrm, err)) {
1835 dst_hold(&xdst->u.dst); 1841 dst_hold(&xdst->u.dst);
1842 xfrm_pols_put(pols, num_pols);
1836 while (err > 0) 1843 while (err > 0)
1837 xfrm_state_put(xfrm[--err]); 1844 xfrm_state_put(xfrm[--err]);
1838 return xdst; 1845 return xdst;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 065d89606888..500b3391f474 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1343,6 +1343,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
1343 1343
1344 if (orig->aead) { 1344 if (orig->aead) {
1345 x->aead = xfrm_algo_aead_clone(orig->aead); 1345 x->aead = xfrm_algo_aead_clone(orig->aead);
1346 x->geniv = orig->geniv;
1346 if (!x->aead) 1347 if (!x->aead)
1347 goto error; 1348 goto error;
1348 } 1349 }
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 983b0233767b..bdb48e5dba04 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1419,11 +1419,14 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
1419 1419
1420static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) 1420static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
1421{ 1421{
1422 u16 prev_family;
1422 int i; 1423 int i;
1423 1424
1424 if (nr > XFRM_MAX_DEPTH) 1425 if (nr > XFRM_MAX_DEPTH)
1425 return -EINVAL; 1426 return -EINVAL;
1426 1427
1428 prev_family = family;
1429
1427 for (i = 0; i < nr; i++) { 1430 for (i = 0; i < nr; i++) {
1428 /* We never validated the ut->family value, so many 1431 /* We never validated the ut->family value, so many
1429 * applications simply leave it at zero. The check was 1432 * applications simply leave it at zero. The check was
@@ -1435,6 +1438,12 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
1435 if (!ut[i].family) 1438 if (!ut[i].family)
1436 ut[i].family = family; 1439 ut[i].family = family;
1437 1440
1441 if ((ut[i].mode == XFRM_MODE_TRANSPORT) &&
1442 (ut[i].family != prev_family))
1443 return -EINVAL;
1444
1445 prev_family = ut[i].family;
1446
1438 switch (ut[i].family) { 1447 switch (ut[i].family) {
1439 case AF_INET: 1448 case AF_INET:
1440 break; 1449 break;
@@ -1445,6 +1454,21 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
1445 default: 1454 default:
1446 return -EINVAL; 1455 return -EINVAL;
1447 } 1456 }
1457
1458 switch (ut[i].id.proto) {
1459 case IPPROTO_AH:
1460 case IPPROTO_ESP:
1461 case IPPROTO_COMP:
1462#if IS_ENABLED(CONFIG_IPV6)
1463 case IPPROTO_ROUTING:
1464 case IPPROTO_DSTOPTS:
1465#endif
1466 case IPSEC_PROTO_ANY:
1467 break;
1468 default:
1469 return -EINVAL;
1470 }
1471
1448 } 1472 }
1449 1473
1450 return 0; 1474 return 0;
@@ -2470,7 +2494,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
2470 [XFRMA_PROTO] = { .type = NLA_U8 }, 2494 [XFRMA_PROTO] = { .type = NLA_U8 },
2471 [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, 2495 [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) },
2472 [XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) }, 2496 [XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) },
2473 [XFRMA_OUTPUT_MARK] = { .len = NLA_U32 }, 2497 [XFRMA_OUTPUT_MARK] = { .type = NLA_U32 },
2474}; 2498};
2475 2499
2476static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { 2500static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 040aa79e1d9d..31031f10fe56 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -6233,28 +6233,6 @@ sub process {
6233 } 6233 }
6234 } 6234 }
6235 6235
6236# whine about ACCESS_ONCE
6237 if ($^V && $^V ge 5.10.0 &&
6238 $line =~ /\bACCESS_ONCE\s*$balanced_parens\s*(=(?!=))?\s*($FuncArg)?/) {
6239 my $par = $1;
6240 my $eq = $2;
6241 my $fun = $3;
6242 $par =~ s/^\(\s*(.*)\s*\)$/$1/;
6243 if (defined($eq)) {
6244 if (WARN("PREFER_WRITE_ONCE",
6245 "Prefer WRITE_ONCE(<FOO>, <BAR>) over ACCESS_ONCE(<FOO>) = <BAR>\n" . $herecurr) &&
6246 $fix) {
6247 $fixed[$fixlinenr] =~ s/\bACCESS_ONCE\s*\(\s*\Q$par\E\s*\)\s*$eq\s*\Q$fun\E/WRITE_ONCE($par, $fun)/;
6248 }
6249 } else {
6250 if (WARN("PREFER_READ_ONCE",
6251 "Prefer READ_ONCE(<FOO>) over ACCESS_ONCE(<FOO>)\n" . $herecurr) &&
6252 $fix) {
6253 $fixed[$fixlinenr] =~ s/\bACCESS_ONCE\s*\(\s*\Q$par\E\s*\)/READ_ONCE($par)/;
6254 }
6255 }
6256 }
6257
6258# check for mutex_trylock_recursive usage 6236# check for mutex_trylock_recursive usage
6259 if ($line =~ /mutex_trylock_recursive/) { 6237 if ($line =~ /mutex_trylock_recursive/) {
6260 ERROR("LOCKING", 6238 ERROR("LOCKING",
diff --git a/scripts/faddr2line b/scripts/faddr2line
index 39e07d8574dd..7721d5b2b0c0 100755
--- a/scripts/faddr2line
+++ b/scripts/faddr2line
@@ -44,10 +44,10 @@
44set -o errexit 44set -o errexit
45set -o nounset 45set -o nounset
46 46
47READELF="${CROSS_COMPILE}readelf" 47READELF="${CROSS_COMPILE:-}readelf"
48ADDR2LINE="${CROSS_COMPILE}addr2line" 48ADDR2LINE="${CROSS_COMPILE:-}addr2line"
49SIZE="${CROSS_COMPILE}size" 49SIZE="${CROSS_COMPILE:-}size"
50NM="${CROSS_COMPILE}nm" 50NM="${CROSS_COMPILE:-}nm"
51 51
52command -v awk >/dev/null 2>&1 || die "awk isn't installed" 52command -v awk >/dev/null 2>&1 || die "awk isn't installed"
53command -v ${READELF} >/dev/null 2>&1 || die "readelf isn't installed" 53command -v ${READELF} >/dev/null 2>&1 || die "readelf isn't installed"
diff --git a/security/Kconfig b/security/Kconfig
index 3b4effd8bbc2..5ea8914817f6 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -54,6 +54,16 @@ config SECURITY_NETWORK
54 implement socket and networking access controls. 54 implement socket and networking access controls.
55 If you are unsure how to answer this question, answer N. 55 If you are unsure how to answer this question, answer N.
56 56
57config PAGE_TABLE_ISOLATION
58 bool "Remove the kernel mapping in user mode"
59 depends on X86_64 && !UML
60 help
61 This feature reduces the number of hardware side channels by
62 ensuring that the majority of kernel addresses are not mapped
63 into userspace.
64
65 See Documentation/x86/pagetable-isolation.txt for more details.
66
57config SECURITY_INFINIBAND 67config SECURITY_INFINIBAND
58 bool "Infiniband Security Hooks" 68 bool "Infiniband Security Hooks"
59 depends on SECURITY && INFINIBAND 69 depends on SECURITY && INFINIBAND
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index b3b353d72527..f055ca10bbc1 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -579,15 +579,14 @@ static int snd_rawmidi_info_user(struct snd_rawmidi_substream *substream,
579 return 0; 579 return 0;
580} 580}
581 581
582int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info) 582static int __snd_rawmidi_info_select(struct snd_card *card,
583 struct snd_rawmidi_info *info)
583{ 584{
584 struct snd_rawmidi *rmidi; 585 struct snd_rawmidi *rmidi;
585 struct snd_rawmidi_str *pstr; 586 struct snd_rawmidi_str *pstr;
586 struct snd_rawmidi_substream *substream; 587 struct snd_rawmidi_substream *substream;
587 588
588 mutex_lock(&register_mutex);
589 rmidi = snd_rawmidi_search(card, info->device); 589 rmidi = snd_rawmidi_search(card, info->device);
590 mutex_unlock(&register_mutex);
591 if (!rmidi) 590 if (!rmidi)
592 return -ENXIO; 591 return -ENXIO;
593 if (info->stream < 0 || info->stream > 1) 592 if (info->stream < 0 || info->stream > 1)
@@ -603,6 +602,16 @@ int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info
603 } 602 }
604 return -ENXIO; 603 return -ENXIO;
605} 604}
605
606int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
607{
608 int ret;
609
610 mutex_lock(&register_mutex);
611 ret = __snd_rawmidi_info_select(card, info);
612 mutex_unlock(&register_mutex);
613 return ret;
614}
606EXPORT_SYMBOL(snd_rawmidi_info_select); 615EXPORT_SYMBOL(snd_rawmidi_info_select);
607 616
608static int snd_rawmidi_info_select_user(struct snd_card *card, 617static int snd_rawmidi_info_select_user(struct snd_card *card,
diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c
index 038a180d3f81..cbe818eda336 100644
--- a/sound/hda/hdac_i915.c
+++ b/sound/hda/hdac_i915.c
@@ -325,7 +325,7 @@ static int hdac_component_master_match(struct device *dev, void *data)
325 */ 325 */
326int snd_hdac_i915_register_notifier(const struct i915_audio_component_audio_ops *aops) 326int snd_hdac_i915_register_notifier(const struct i915_audio_component_audio_ops *aops)
327{ 327{
328 if (WARN_ON(!hdac_acomp)) 328 if (!hdac_acomp)
329 return -ENODEV; 329 return -ENODEV;
330 330
331 hdac_acomp->audio_ops = aops; 331 hdac_acomp->audio_ops = aops;
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index a81aacf684b2..37e1cf8218ff 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -271,6 +271,8 @@ enum {
271 CXT_FIXUP_HP_SPECTRE, 271 CXT_FIXUP_HP_SPECTRE,
272 CXT_FIXUP_HP_GATE_MIC, 272 CXT_FIXUP_HP_GATE_MIC,
273 CXT_FIXUP_MUTE_LED_GPIO, 273 CXT_FIXUP_MUTE_LED_GPIO,
274 CXT_FIXUP_HEADSET_MIC,
275 CXT_FIXUP_HP_MIC_NO_PRESENCE,
274}; 276};
275 277
276/* for hda_fixup_thinkpad_acpi() */ 278/* for hda_fixup_thinkpad_acpi() */
@@ -350,6 +352,18 @@ static void cxt_fixup_headphone_mic(struct hda_codec *codec,
350 } 352 }
351} 353}
352 354
355static void cxt_fixup_headset_mic(struct hda_codec *codec,
356 const struct hda_fixup *fix, int action)
357{
358 struct conexant_spec *spec = codec->spec;
359
360 switch (action) {
361 case HDA_FIXUP_ACT_PRE_PROBE:
362 spec->parse_flags |= HDA_PINCFG_HEADSET_MIC;
363 break;
364 }
365}
366
353/* OPLC XO 1.5 fixup */ 367/* OPLC XO 1.5 fixup */
354 368
355/* OLPC XO-1.5 supports DC input mode (e.g. for use with analog sensors) 369/* OLPC XO-1.5 supports DC input mode (e.g. for use with analog sensors)
@@ -880,6 +894,19 @@ static const struct hda_fixup cxt_fixups[] = {
880 .type = HDA_FIXUP_FUNC, 894 .type = HDA_FIXUP_FUNC,
881 .v.func = cxt_fixup_mute_led_gpio, 895 .v.func = cxt_fixup_mute_led_gpio,
882 }, 896 },
897 [CXT_FIXUP_HEADSET_MIC] = {
898 .type = HDA_FIXUP_FUNC,
899 .v.func = cxt_fixup_headset_mic,
900 },
901 [CXT_FIXUP_HP_MIC_NO_PRESENCE] = {
902 .type = HDA_FIXUP_PINS,
903 .v.pins = (const struct hda_pintbl[]) {
904 { 0x1a, 0x02a1113c },
905 { }
906 },
907 .chained = true,
908 .chain_id = CXT_FIXUP_HEADSET_MIC,
909 },
883}; 910};
884 911
885static const struct snd_pci_quirk cxt5045_fixups[] = { 912static const struct snd_pci_quirk cxt5045_fixups[] = {
@@ -934,6 +961,8 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
934 SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC), 961 SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC),
935 SND_PCI_QUIRK(0x103c, 0x814f, "HP ZBook 15u G3", CXT_FIXUP_MUTE_LED_GPIO), 962 SND_PCI_QUIRK(0x103c, 0x814f, "HP ZBook 15u G3", CXT_FIXUP_MUTE_LED_GPIO),
936 SND_PCI_QUIRK(0x103c, 0x822e, "HP ProBook 440 G4", CXT_FIXUP_MUTE_LED_GPIO), 963 SND_PCI_QUIRK(0x103c, 0x822e, "HP ProBook 440 G4", CXT_FIXUP_MUTE_LED_GPIO),
964 SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE),
965 SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE),
937 SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), 966 SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),
938 SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO), 967 SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO),
939 SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410), 968 SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410),
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index c19c81d230bd..b4f1b6e88305 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -55,10 +55,11 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info");
55#define is_kabylake(codec) ((codec)->core.vendor_id == 0x8086280b) 55#define is_kabylake(codec) ((codec)->core.vendor_id == 0x8086280b)
56#define is_geminilake(codec) (((codec)->core.vendor_id == 0x8086280d) || \ 56#define is_geminilake(codec) (((codec)->core.vendor_id == 0x8086280d) || \
57 ((codec)->core.vendor_id == 0x80862800)) 57 ((codec)->core.vendor_id == 0x80862800))
58#define is_cannonlake(codec) ((codec)->core.vendor_id == 0x8086280c)
58#define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \ 59#define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \
59 || is_skylake(codec) || is_broxton(codec) \ 60 || is_skylake(codec) || is_broxton(codec) \
60 || is_kabylake(codec)) || is_geminilake(codec) 61 || is_kabylake(codec)) || is_geminilake(codec) \
61 62 || is_cannonlake(codec)
62#define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882) 63#define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882)
63#define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883) 64#define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883)
64#define is_valleyview_plus(codec) (is_valleyview(codec) || is_cherryview(codec)) 65#define is_valleyview_plus(codec) (is_valleyview(codec) || is_cherryview(codec))
@@ -3841,6 +3842,7 @@ HDA_CODEC_ENTRY(0x80862808, "Broadwell HDMI", patch_i915_hsw_hdmi),
3841HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI", patch_i915_hsw_hdmi), 3842HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI", patch_i915_hsw_hdmi),
3842HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI", patch_i915_hsw_hdmi), 3843HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI", patch_i915_hsw_hdmi),
3843HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI", patch_i915_hsw_hdmi), 3844HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI", patch_i915_hsw_hdmi),
3845HDA_CODEC_ENTRY(0x8086280c, "Cannonlake HDMI", patch_i915_glk_hdmi),
3844HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi), 3846HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi),
3845HDA_CODEC_ENTRY(0x80862800, "Geminilake HDMI", patch_i915_glk_hdmi), 3847HDA_CODEC_ENTRY(0x80862800, "Geminilake HDMI", patch_i915_glk_hdmi),
3846HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi), 3848HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 4b21f71d685c..8fd2d9c62c96 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -324,8 +324,12 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
324 case 0x10ec0292: 324 case 0x10ec0292:
325 alc_update_coef_idx(codec, 0x4, 1<<15, 0); 325 alc_update_coef_idx(codec, 0x4, 1<<15, 0);
326 break; 326 break;
327 case 0x10ec0215:
328 case 0x10ec0225: 327 case 0x10ec0225:
328 case 0x10ec0295:
329 case 0x10ec0299:
330 alc_update_coef_idx(codec, 0x67, 0xf000, 0x3000);
331 /* fallthrough */
332 case 0x10ec0215:
329 case 0x10ec0233: 333 case 0x10ec0233:
330 case 0x10ec0236: 334 case 0x10ec0236:
331 case 0x10ec0255: 335 case 0x10ec0255:
@@ -336,10 +340,8 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
336 case 0x10ec0286: 340 case 0x10ec0286:
337 case 0x10ec0288: 341 case 0x10ec0288:
338 case 0x10ec0285: 342 case 0x10ec0285:
339 case 0x10ec0295:
340 case 0x10ec0298: 343 case 0x10ec0298:
341 case 0x10ec0289: 344 case 0x10ec0289:
342 case 0x10ec0299:
343 alc_update_coef_idx(codec, 0x10, 1<<9, 0); 345 alc_update_coef_idx(codec, 0x10, 1<<9, 0);
344 break; 346 break;
345 case 0x10ec0275: 347 case 0x10ec0275:
@@ -5185,6 +5187,22 @@ static void alc233_alc662_fixup_lenovo_dual_codecs(struct hda_codec *codec,
5185 } 5187 }
5186} 5188}
5187 5189
5190/* Forcibly assign NID 0x03 to HP/LO while NID 0x02 to SPK for EQ */
5191static void alc274_fixup_bind_dacs(struct hda_codec *codec,
5192 const struct hda_fixup *fix, int action)
5193{
5194 struct alc_spec *spec = codec->spec;
5195 static hda_nid_t preferred_pairs[] = {
5196 0x21, 0x03, 0x1b, 0x03, 0x16, 0x02,
5197 0
5198 };
5199
5200 if (action != HDA_FIXUP_ACT_PRE_PROBE)
5201 return;
5202
5203 spec->gen.preferred_dacs = preferred_pairs;
5204}
5205
5188/* for hda_fixup_thinkpad_acpi() */ 5206/* for hda_fixup_thinkpad_acpi() */
5189#include "thinkpad_helper.c" 5207#include "thinkpad_helper.c"
5190 5208
@@ -5302,6 +5320,8 @@ enum {
5302 ALC233_FIXUP_LENOVO_MULTI_CODECS, 5320 ALC233_FIXUP_LENOVO_MULTI_CODECS,
5303 ALC294_FIXUP_LENOVO_MIC_LOCATION, 5321 ALC294_FIXUP_LENOVO_MIC_LOCATION,
5304 ALC700_FIXUP_INTEL_REFERENCE, 5322 ALC700_FIXUP_INTEL_REFERENCE,
5323 ALC274_FIXUP_DELL_BIND_DACS,
5324 ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
5305}; 5325};
5306 5326
5307static const struct hda_fixup alc269_fixups[] = { 5327static const struct hda_fixup alc269_fixups[] = {
@@ -6112,6 +6132,21 @@ static const struct hda_fixup alc269_fixups[] = {
6112 {} 6132 {}
6113 } 6133 }
6114 }, 6134 },
6135 [ALC274_FIXUP_DELL_BIND_DACS] = {
6136 .type = HDA_FIXUP_FUNC,
6137 .v.func = alc274_fixup_bind_dacs,
6138 .chained = true,
6139 .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE
6140 },
6141 [ALC274_FIXUP_DELL_AIO_LINEOUT_VERB] = {
6142 .type = HDA_FIXUP_PINS,
6143 .v.pins = (const struct hda_pintbl[]) {
6144 { 0x1b, 0x0401102f },
6145 { }
6146 },
6147 .chained = true,
6148 .chain_id = ALC274_FIXUP_DELL_BIND_DACS
6149 },
6115}; 6150};
6116 6151
6117static const struct snd_pci_quirk alc269_fixup_tbl[] = { 6152static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -6295,6 +6330,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
6295 SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), 6330 SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
6296 SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), 6331 SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
6297 SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), 6332 SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
6333 SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
6298 SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), 6334 SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
6299 SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), 6335 SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
6300 SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), 6336 SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
@@ -6553,6 +6589,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
6553 {0x1b, 0x01011020}, 6589 {0x1b, 0x01011020},
6554 {0x21, 0x02211010}), 6590 {0x21, 0x02211010}),
6555 SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, 6591 SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
6592 {0x12, 0x90a60130},
6593 {0x14, 0x90170110},
6594 {0x1b, 0x01011020},
6595 {0x21, 0x0221101f}),
6596 SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
6556 {0x12, 0x90a60160}, 6597 {0x12, 0x90a60160},
6557 {0x14, 0x90170120}, 6598 {0x14, 0x90170120},
6558 {0x21, 0x02211030}), 6599 {0x21, 0x02211030}),
@@ -6578,7 +6619,7 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
6578 {0x14, 0x90170110}, 6619 {0x14, 0x90170110},
6579 {0x1b, 0x90a70130}, 6620 {0x1b, 0x90a70130},
6580 {0x21, 0x03211020}), 6621 {0x21, 0x03211020}),
6581 SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, 6622 SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
6582 {0x12, 0xb7a60130}, 6623 {0x12, 0xb7a60130},
6583 {0x13, 0xb8a61140}, 6624 {0x13, 0xb8a61140},
6584 {0x16, 0x90170110}, 6625 {0x16, 0x90170110},
diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c
index 9f521a55d610..b5e41df6bb3a 100644
--- a/sound/soc/amd/acp-pcm-dma.c
+++ b/sound/soc/amd/acp-pcm-dma.c
@@ -1051,6 +1051,11 @@ static int acp_audio_probe(struct platform_device *pdev)
1051 struct resource *res; 1051 struct resource *res;
1052 const u32 *pdata = pdev->dev.platform_data; 1052 const u32 *pdata = pdev->dev.platform_data;
1053 1053
1054 if (!pdata) {
1055 dev_err(&pdev->dev, "Missing platform data\n");
1056 return -ENODEV;
1057 }
1058
1054 audio_drv_data = devm_kzalloc(&pdev->dev, sizeof(struct audio_drv_data), 1059 audio_drv_data = devm_kzalloc(&pdev->dev, sizeof(struct audio_drv_data),
1055 GFP_KERNEL); 1060 GFP_KERNEL);
1056 if (audio_drv_data == NULL) 1061 if (audio_drv_data == NULL)
@@ -1058,6 +1063,8 @@ static int acp_audio_probe(struct platform_device *pdev)
1058 1063
1059 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 1064 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1060 audio_drv_data->acp_mmio = devm_ioremap_resource(&pdev->dev, res); 1065 audio_drv_data->acp_mmio = devm_ioremap_resource(&pdev->dev, res);
1066 if (IS_ERR(audio_drv_data->acp_mmio))
1067 return PTR_ERR(audio_drv_data->acp_mmio);
1061 1068
1062 /* The following members gets populated in device 'open' 1069 /* The following members gets populated in device 'open'
1063 * function. Till then interrupts are disabled in 'acp_init' 1070 * function. Till then interrupts are disabled in 'acp_init'
diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig
index 4a56f3dfba51..dcee145dd179 100644
--- a/sound/soc/atmel/Kconfig
+++ b/sound/soc/atmel/Kconfig
@@ -64,7 +64,7 @@ config SND_AT91_SOC_SAM9X5_WM8731
64config SND_ATMEL_SOC_CLASSD 64config SND_ATMEL_SOC_CLASSD
65 tristate "Atmel ASoC driver for boards using CLASSD" 65 tristate "Atmel ASoC driver for boards using CLASSD"
66 depends on ARCH_AT91 || COMPILE_TEST 66 depends on ARCH_AT91 || COMPILE_TEST
67 select SND_ATMEL_SOC_DMA 67 select SND_SOC_GENERIC_DMAENGINE_PCM
68 select REGMAP_MMIO 68 select REGMAP_MMIO
69 help 69 help
70 Say Y if you want to add support for Atmel ASoC driver for boards using 70 Say Y if you want to add support for Atmel ASoC driver for boards using
diff --git a/sound/soc/codecs/da7218.c b/sound/soc/codecs/da7218.c
index b2d42ec1dcd9..56564ce90cb6 100644
--- a/sound/soc/codecs/da7218.c
+++ b/sound/soc/codecs/da7218.c
@@ -2520,7 +2520,7 @@ static struct da7218_pdata *da7218_of_to_pdata(struct snd_soc_codec *codec)
2520 } 2520 }
2521 2521
2522 if (da7218->dev_id == DA7218_DEV_ID) { 2522 if (da7218->dev_id == DA7218_DEV_ID) {
2523 hpldet_np = of_find_node_by_name(np, "da7218_hpldet"); 2523 hpldet_np = of_get_child_by_name(np, "da7218_hpldet");
2524 if (!hpldet_np) 2524 if (!hpldet_np)
2525 return pdata; 2525 return pdata;
2526 2526
diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c
index 5f3c42c4f74a..066ea2f4ce7b 100644
--- a/sound/soc/codecs/msm8916-wcd-analog.c
+++ b/sound/soc/codecs/msm8916-wcd-analog.c
@@ -267,7 +267,7 @@
267#define MSM8916_WCD_ANALOG_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |\ 267#define MSM8916_WCD_ANALOG_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |\
268 SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_48000) 268 SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_48000)
269#define MSM8916_WCD_ANALOG_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\ 269#define MSM8916_WCD_ANALOG_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\
270 SNDRV_PCM_FMTBIT_S24_LE) 270 SNDRV_PCM_FMTBIT_S32_LE)
271 271
272static int btn_mask = SND_JACK_BTN_0 | SND_JACK_BTN_1 | 272static int btn_mask = SND_JACK_BTN_0 | SND_JACK_BTN_1 |
273 SND_JACK_BTN_2 | SND_JACK_BTN_3 | SND_JACK_BTN_4; 273 SND_JACK_BTN_2 | SND_JACK_BTN_3 | SND_JACK_BTN_4;
diff --git a/sound/soc/codecs/msm8916-wcd-digital.c b/sound/soc/codecs/msm8916-wcd-digital.c
index a10a724eb448..13354d6304a8 100644
--- a/sound/soc/codecs/msm8916-wcd-digital.c
+++ b/sound/soc/codecs/msm8916-wcd-digital.c
@@ -194,7 +194,7 @@
194 SNDRV_PCM_RATE_32000 | \ 194 SNDRV_PCM_RATE_32000 | \
195 SNDRV_PCM_RATE_48000) 195 SNDRV_PCM_RATE_48000)
196#define MSM8916_WCD_DIGITAL_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\ 196#define MSM8916_WCD_DIGITAL_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\
197 SNDRV_PCM_FMTBIT_S24_LE) 197 SNDRV_PCM_FMTBIT_S32_LE)
198 198
199struct msm8916_wcd_digital_priv { 199struct msm8916_wcd_digital_priv {
200 struct clk *ahbclk, *mclk; 200 struct clk *ahbclk, *mclk;
@@ -645,7 +645,7 @@ static int msm8916_wcd_digital_hw_params(struct snd_pcm_substream *substream,
645 RX_I2S_CTL_RX_I2S_MODE_MASK, 645 RX_I2S_CTL_RX_I2S_MODE_MASK,
646 RX_I2S_CTL_RX_I2S_MODE_16); 646 RX_I2S_CTL_RX_I2S_MODE_16);
647 break; 647 break;
648 case SNDRV_PCM_FORMAT_S24_LE: 648 case SNDRV_PCM_FORMAT_S32_LE:
649 snd_soc_update_bits(dai->codec, LPASS_CDC_CLK_TX_I2S_CTL, 649 snd_soc_update_bits(dai->codec, LPASS_CDC_CLK_TX_I2S_CTL,
650 TX_I2S_CTL_TX_I2S_MODE_MASK, 650 TX_I2S_CTL_TX_I2S_MODE_MASK,
651 TX_I2S_CTL_TX_I2S_MODE_32); 651 TX_I2S_CTL_TX_I2S_MODE_32);
diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c
index 714ce17da717..e853a6dfd33b 100644
--- a/sound/soc/codecs/nau8825.c
+++ b/sound/soc/codecs/nau8825.c
@@ -905,6 +905,7 @@ static int nau8825_adc_event(struct snd_soc_dapm_widget *w,
905 905
906 switch (event) { 906 switch (event) {
907 case SND_SOC_DAPM_POST_PMU: 907 case SND_SOC_DAPM_POST_PMU:
908 msleep(125);
908 regmap_update_bits(nau8825->regmap, NAU8825_REG_ENA_CTRL, 909 regmap_update_bits(nau8825->regmap, NAU8825_REG_ENA_CTRL,
909 NAU8825_ENABLE_ADC, NAU8825_ENABLE_ADC); 910 NAU8825_ENABLE_ADC, NAU8825_ENABLE_ADC);
910 break; 911 break;
diff --git a/sound/soc/codecs/rt5514-spi.c b/sound/soc/codecs/rt5514-spi.c
index 2df91db765ac..64bf26cec20d 100644
--- a/sound/soc/codecs/rt5514-spi.c
+++ b/sound/soc/codecs/rt5514-spi.c
@@ -289,6 +289,8 @@ static int rt5514_spi_pcm_probe(struct snd_soc_platform *platform)
289 dev_err(&rt5514_spi->dev, 289 dev_err(&rt5514_spi->dev,
290 "%s Failed to reguest IRQ: %d\n", __func__, 290 "%s Failed to reguest IRQ: %d\n", __func__,
291 ret); 291 ret);
292 else
293 device_init_wakeup(rt5514_dsp->dev, true);
292 } 294 }
293 295
294 return 0; 296 return 0;
@@ -456,8 +458,6 @@ static int rt5514_spi_probe(struct spi_device *spi)
456 return ret; 458 return ret;
457 } 459 }
458 460
459 device_init_wakeup(&spi->dev, true);
460
461 return 0; 461 return 0;
462} 462}
463 463
@@ -482,10 +482,13 @@ static int __maybe_unused rt5514_resume(struct device *dev)
482 if (device_may_wakeup(dev)) 482 if (device_may_wakeup(dev))
483 disable_irq_wake(irq); 483 disable_irq_wake(irq);
484 484
485 if (rt5514_dsp->substream) { 485 if (rt5514_dsp) {
486 rt5514_spi_burst_read(RT5514_IRQ_CTRL, (u8 *)&buf, sizeof(buf)); 486 if (rt5514_dsp->substream) {
487 if (buf[0] & RT5514_IRQ_STATUS_BIT) 487 rt5514_spi_burst_read(RT5514_IRQ_CTRL, (u8 *)&buf,
488 rt5514_schedule_copy(rt5514_dsp); 488 sizeof(buf));
489 if (buf[0] & RT5514_IRQ_STATUS_BIT)
490 rt5514_schedule_copy(rt5514_dsp);
491 }
489 } 492 }
490 493
491 return 0; 494 return 0;
diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c
index 2a5b5d74e697..2dd6e9f990a4 100644
--- a/sound/soc/codecs/rt5514.c
+++ b/sound/soc/codecs/rt5514.c
@@ -496,7 +496,7 @@ static const struct snd_soc_dapm_widget rt5514_dapm_widgets[] = {
496 SND_SOC_DAPM_PGA("DMIC1", SND_SOC_NOPM, 0, 0, NULL, 0), 496 SND_SOC_DAPM_PGA("DMIC1", SND_SOC_NOPM, 0, 0, NULL, 0),
497 SND_SOC_DAPM_PGA("DMIC2", SND_SOC_NOPM, 0, 0, NULL, 0), 497 SND_SOC_DAPM_PGA("DMIC2", SND_SOC_NOPM, 0, 0, NULL, 0),
498 498
499 SND_SOC_DAPM_SUPPLY("DMIC CLK", SND_SOC_NOPM, 0, 0, 499 SND_SOC_DAPM_SUPPLY_S("DMIC CLK", 1, SND_SOC_NOPM, 0, 0,
500 rt5514_set_dmic_clk, SND_SOC_DAPM_PRE_PMU), 500 rt5514_set_dmic_clk, SND_SOC_DAPM_PRE_PMU),
501 501
502 SND_SOC_DAPM_SUPPLY("ADC CLK", RT5514_CLK_CTRL1, 502 SND_SOC_DAPM_SUPPLY("ADC CLK", RT5514_CLK_CTRL1,
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index f020d2d1eef4..edc152c8a1fe 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -3823,6 +3823,8 @@ static int rt5645_i2c_probe(struct i2c_client *i2c,
3823 regmap_read(regmap, RT5645_VENDOR_ID, &val); 3823 regmap_read(regmap, RT5645_VENDOR_ID, &val);
3824 rt5645->v_id = val & 0xff; 3824 rt5645->v_id = val & 0xff;
3825 3825
3826 regmap_write(rt5645->regmap, RT5645_AD_DA_MIXER, 0x8080);
3827
3826 ret = regmap_register_patch(rt5645->regmap, init_list, 3828 ret = regmap_register_patch(rt5645->regmap, init_list,
3827 ARRAY_SIZE(init_list)); 3829 ARRAY_SIZE(init_list));
3828 if (ret != 0) 3830 if (ret != 0)
diff --git a/sound/soc/codecs/rt5663.c b/sound/soc/codecs/rt5663.c
index b036c9dc0c8c..d329bf719d80 100644
--- a/sound/soc/codecs/rt5663.c
+++ b/sound/soc/codecs/rt5663.c
@@ -1560,6 +1560,10 @@ static int rt5663_jack_detect(struct snd_soc_codec *codec, int jack_insert)
1560 RT5663_IRQ_POW_SAV_MASK, RT5663_IRQ_POW_SAV_EN); 1560 RT5663_IRQ_POW_SAV_MASK, RT5663_IRQ_POW_SAV_EN);
1561 snd_soc_update_bits(codec, RT5663_IRQ_1, 1561 snd_soc_update_bits(codec, RT5663_IRQ_1,
1562 RT5663_EN_IRQ_JD1_MASK, RT5663_EN_IRQ_JD1_EN); 1562 RT5663_EN_IRQ_JD1_MASK, RT5663_EN_IRQ_JD1_EN);
1563 snd_soc_update_bits(codec, RT5663_EM_JACK_TYPE_1,
1564 RT5663_EM_JD_MASK, RT5663_EM_JD_RST);
1565 snd_soc_update_bits(codec, RT5663_EM_JACK_TYPE_1,
1566 RT5663_EM_JD_MASK, RT5663_EM_JD_NOR);
1563 1567
1564 while (true) { 1568 while (true) {
1565 regmap_read(rt5663->regmap, RT5663_INT_ST_2, &val); 1569 regmap_read(rt5663->regmap, RT5663_INT_ST_2, &val);
diff --git a/sound/soc/codecs/rt5663.h b/sound/soc/codecs/rt5663.h
index c5a9b69579ad..03adc8004ba9 100644
--- a/sound/soc/codecs/rt5663.h
+++ b/sound/soc/codecs/rt5663.h
@@ -1029,6 +1029,10 @@
1029#define RT5663_POL_EXT_JD_SHIFT 10 1029#define RT5663_POL_EXT_JD_SHIFT 10
1030#define RT5663_POL_EXT_JD_EN (0x1 << 10) 1030#define RT5663_POL_EXT_JD_EN (0x1 << 10)
1031#define RT5663_POL_EXT_JD_DIS (0x0 << 10) 1031#define RT5663_POL_EXT_JD_DIS (0x0 << 10)
1032#define RT5663_EM_JD_MASK (0x1 << 7)
1033#define RT5663_EM_JD_SHIFT 7
1034#define RT5663_EM_JD_NOR (0x1 << 7)
1035#define RT5663_EM_JD_RST (0x0 << 7)
1032 1036
1033/* DACREF LDO Control (0x0112)*/ 1037/* DACREF LDO Control (0x0112)*/
1034#define RT5663_PWR_LDO_DACREFL_MASK (0x1 << 9) 1038#define RT5663_PWR_LDO_DACREFL_MASK (0x1 << 9)
diff --git a/sound/soc/codecs/tlv320aic31xx.h b/sound/soc/codecs/tlv320aic31xx.h
index 730fb2058869..1ff3edb7bbb6 100644
--- a/sound/soc/codecs/tlv320aic31xx.h
+++ b/sound/soc/codecs/tlv320aic31xx.h
@@ -116,7 +116,7 @@ struct aic31xx_pdata {
116/* INT2 interrupt control */ 116/* INT2 interrupt control */
117#define AIC31XX_INT2CTRL AIC31XX_REG(0, 49) 117#define AIC31XX_INT2CTRL AIC31XX_REG(0, 49)
118/* GPIO1 control */ 118/* GPIO1 control */
119#define AIC31XX_GPIO1 AIC31XX_REG(0, 50) 119#define AIC31XX_GPIO1 AIC31XX_REG(0, 51)
120 120
121#define AIC31XX_DACPRB AIC31XX_REG(0, 60) 121#define AIC31XX_DACPRB AIC31XX_REG(0, 60)
122/* ADC Instruction Set Register */ 122/* ADC Instruction Set Register */
diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index c482b2e7a7d2..cfe72b9d4356 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -232,7 +232,7 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec)
232 struct twl4030_codec_data *pdata = dev_get_platdata(codec->dev); 232 struct twl4030_codec_data *pdata = dev_get_platdata(codec->dev);
233 struct device_node *twl4030_codec_node = NULL; 233 struct device_node *twl4030_codec_node = NULL;
234 234
235 twl4030_codec_node = of_find_node_by_name(codec->dev->parent->of_node, 235 twl4030_codec_node = of_get_child_by_name(codec->dev->parent->of_node,
236 "codec"); 236 "codec");
237 237
238 if (!pdata && twl4030_codec_node) { 238 if (!pdata && twl4030_codec_node) {
@@ -241,9 +241,11 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec)
241 GFP_KERNEL); 241 GFP_KERNEL);
242 if (!pdata) { 242 if (!pdata) {
243 dev_err(codec->dev, "Can not allocate memory\n"); 243 dev_err(codec->dev, "Can not allocate memory\n");
244 of_node_put(twl4030_codec_node);
244 return NULL; 245 return NULL;
245 } 246 }
246 twl4030_setup_pdata_of(pdata, twl4030_codec_node); 247 twl4030_setup_pdata_of(pdata, twl4030_codec_node);
248 of_node_put(twl4030_codec_node);
247 } 249 }
248 250
249 return pdata; 251 return pdata;
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index 65c059b5ffd7..66e32f5d2917 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -1733,7 +1733,7 @@ static int wm_adsp_load(struct wm_adsp *dsp)
1733 le64_to_cpu(footer->timestamp)); 1733 le64_to_cpu(footer->timestamp));
1734 1734
1735 while (pos < firmware->size && 1735 while (pos < firmware->size &&
1736 pos - firmware->size > sizeof(*region)) { 1736 sizeof(*region) < firmware->size - pos) {
1737 region = (void *)&(firmware->data[pos]); 1737 region = (void *)&(firmware->data[pos]);
1738 region_name = "Unknown"; 1738 region_name = "Unknown";
1739 reg = 0; 1739 reg = 0;
@@ -1782,8 +1782,8 @@ static int wm_adsp_load(struct wm_adsp *dsp)
1782 regions, le32_to_cpu(region->len), offset, 1782 regions, le32_to_cpu(region->len), offset,
1783 region_name); 1783 region_name);
1784 1784
1785 if ((pos + le32_to_cpu(region->len) + sizeof(*region)) > 1785 if (le32_to_cpu(region->len) >
1786 firmware->size) { 1786 firmware->size - pos - sizeof(*region)) {
1787 adsp_err(dsp, 1787 adsp_err(dsp,
1788 "%s.%d: %s region len %d bytes exceeds file length %zu\n", 1788 "%s.%d: %s region len %d bytes exceeds file length %zu\n",
1789 file, regions, region_name, 1789 file, regions, region_name,
@@ -2253,7 +2253,7 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp)
2253 2253
2254 blocks = 0; 2254 blocks = 0;
2255 while (pos < firmware->size && 2255 while (pos < firmware->size &&
2256 pos - firmware->size > sizeof(*blk)) { 2256 sizeof(*blk) < firmware->size - pos) {
2257 blk = (void *)(&firmware->data[pos]); 2257 blk = (void *)(&firmware->data[pos]);
2258 2258
2259 type = le16_to_cpu(blk->type); 2259 type = le16_to_cpu(blk->type);
@@ -2327,8 +2327,8 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp)
2327 } 2327 }
2328 2328
2329 if (reg) { 2329 if (reg) {
2330 if ((pos + le32_to_cpu(blk->len) + sizeof(*blk)) > 2330 if (le32_to_cpu(blk->len) >
2331 firmware->size) { 2331 firmware->size - pos - sizeof(*blk)) {
2332 adsp_err(dsp, 2332 adsp_err(dsp,
2333 "%s.%d: %s region len %d bytes exceeds file length %zu\n", 2333 "%s.%d: %s region len %d bytes exceeds file length %zu\n",
2334 file, blocks, region_name, 2334 file, blocks, region_name,
diff --git a/sound/soc/fsl/fsl_asrc.h b/sound/soc/fsl/fsl_asrc.h
index 0f163abe4ba3..52c27a358933 100644
--- a/sound/soc/fsl/fsl_asrc.h
+++ b/sound/soc/fsl/fsl_asrc.h
@@ -260,8 +260,8 @@
260#define ASRFSTi_OUTPUT_FIFO_SHIFT 12 260#define ASRFSTi_OUTPUT_FIFO_SHIFT 12
261#define ASRFSTi_OUTPUT_FIFO_MASK (((1 << ASRFSTi_OUTPUT_FIFO_WIDTH) - 1) << ASRFSTi_OUTPUT_FIFO_SHIFT) 261#define ASRFSTi_OUTPUT_FIFO_MASK (((1 << ASRFSTi_OUTPUT_FIFO_WIDTH) - 1) << ASRFSTi_OUTPUT_FIFO_SHIFT)
262#define ASRFSTi_IAEi_SHIFT 11 262#define ASRFSTi_IAEi_SHIFT 11
263#define ASRFSTi_IAEi_MASK (1 << ASRFSTi_OAFi_SHIFT) 263#define ASRFSTi_IAEi_MASK (1 << ASRFSTi_IAEi_SHIFT)
264#define ASRFSTi_IAEi (1 << ASRFSTi_OAFi_SHIFT) 264#define ASRFSTi_IAEi (1 << ASRFSTi_IAEi_SHIFT)
265#define ASRFSTi_INPUT_FIFO_WIDTH 7 265#define ASRFSTi_INPUT_FIFO_WIDTH 7
266#define ASRFSTi_INPUT_FIFO_SHIFT 0 266#define ASRFSTi_INPUT_FIFO_SHIFT 0
267#define ASRFSTi_INPUT_FIFO_MASK ((1 << ASRFSTi_INPUT_FIFO_WIDTH) - 1) 267#define ASRFSTi_INPUT_FIFO_MASK ((1 << ASRFSTi_INPUT_FIFO_WIDTH) - 1)
diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c
index f2f51e06e22c..424bafaf51ef 100644
--- a/sound/soc/fsl/fsl_ssi.c
+++ b/sound/soc/fsl/fsl_ssi.c
@@ -38,6 +38,7 @@
38#include <linux/ctype.h> 38#include <linux/ctype.h>
39#include <linux/device.h> 39#include <linux/device.h>
40#include <linux/delay.h> 40#include <linux/delay.h>
41#include <linux/mutex.h>
41#include <linux/slab.h> 42#include <linux/slab.h>
42#include <linux/spinlock.h> 43#include <linux/spinlock.h>
43#include <linux/of.h> 44#include <linux/of.h>
@@ -265,6 +266,8 @@ struct fsl_ssi_private {
265 266
266 u32 fifo_watermark; 267 u32 fifo_watermark;
267 u32 dma_maxburst; 268 u32 dma_maxburst;
269
270 struct mutex ac97_reg_lock;
268}; 271};
269 272
270/* 273/*
@@ -1260,11 +1263,13 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg,
1260 if (reg > 0x7f) 1263 if (reg > 0x7f)
1261 return; 1264 return;
1262 1265
1266 mutex_lock(&fsl_ac97_data->ac97_reg_lock);
1267
1263 ret = clk_prepare_enable(fsl_ac97_data->clk); 1268 ret = clk_prepare_enable(fsl_ac97_data->clk);
1264 if (ret) { 1269 if (ret) {
1265 pr_err("ac97 write clk_prepare_enable failed: %d\n", 1270 pr_err("ac97 write clk_prepare_enable failed: %d\n",
1266 ret); 1271 ret);
1267 return; 1272 goto ret_unlock;
1268 } 1273 }
1269 1274
1270 lreg = reg << 12; 1275 lreg = reg << 12;
@@ -1278,6 +1283,9 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg,
1278 udelay(100); 1283 udelay(100);
1279 1284
1280 clk_disable_unprepare(fsl_ac97_data->clk); 1285 clk_disable_unprepare(fsl_ac97_data->clk);
1286
1287ret_unlock:
1288 mutex_unlock(&fsl_ac97_data->ac97_reg_lock);
1281} 1289}
1282 1290
1283static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97, 1291static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97,
@@ -1285,16 +1293,18 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97,
1285{ 1293{
1286 struct regmap *regs = fsl_ac97_data->regs; 1294 struct regmap *regs = fsl_ac97_data->regs;
1287 1295
1288 unsigned short val = -1; 1296 unsigned short val = 0;
1289 u32 reg_val; 1297 u32 reg_val;
1290 unsigned int lreg; 1298 unsigned int lreg;
1291 int ret; 1299 int ret;
1292 1300
1301 mutex_lock(&fsl_ac97_data->ac97_reg_lock);
1302
1293 ret = clk_prepare_enable(fsl_ac97_data->clk); 1303 ret = clk_prepare_enable(fsl_ac97_data->clk);
1294 if (ret) { 1304 if (ret) {
1295 pr_err("ac97 read clk_prepare_enable failed: %d\n", 1305 pr_err("ac97 read clk_prepare_enable failed: %d\n",
1296 ret); 1306 ret);
1297 return -1; 1307 goto ret_unlock;
1298 } 1308 }
1299 1309
1300 lreg = (reg & 0x7f) << 12; 1310 lreg = (reg & 0x7f) << 12;
@@ -1309,6 +1319,8 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97,
1309 1319
1310 clk_disable_unprepare(fsl_ac97_data->clk); 1320 clk_disable_unprepare(fsl_ac97_data->clk);
1311 1321
1322ret_unlock:
1323 mutex_unlock(&fsl_ac97_data->ac97_reg_lock);
1312 return val; 1324 return val;
1313} 1325}
1314 1326
@@ -1458,12 +1470,6 @@ static int fsl_ssi_probe(struct platform_device *pdev)
1458 sizeof(fsl_ssi_ac97_dai)); 1470 sizeof(fsl_ssi_ac97_dai));
1459 1471
1460 fsl_ac97_data = ssi_private; 1472 fsl_ac97_data = ssi_private;
1461
1462 ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev);
1463 if (ret) {
1464 dev_err(&pdev->dev, "could not set AC'97 ops\n");
1465 return ret;
1466 }
1467 } else { 1473 } else {
1468 /* Initialize this copy of the CPU DAI driver structure */ 1474 /* Initialize this copy of the CPU DAI driver structure */
1469 memcpy(&ssi_private->cpu_dai_drv, &fsl_ssi_dai_template, 1475 memcpy(&ssi_private->cpu_dai_drv, &fsl_ssi_dai_template,
@@ -1574,6 +1580,15 @@ static int fsl_ssi_probe(struct platform_device *pdev)
1574 return ret; 1580 return ret;
1575 } 1581 }
1576 1582
1583 if (fsl_ssi_is_ac97(ssi_private)) {
1584 mutex_init(&ssi_private->ac97_reg_lock);
1585 ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev);
1586 if (ret) {
1587 dev_err(&pdev->dev, "could not set AC'97 ops\n");
1588 goto error_ac97_ops;
1589 }
1590 }
1591
1577 ret = devm_snd_soc_register_component(&pdev->dev, &fsl_ssi_component, 1592 ret = devm_snd_soc_register_component(&pdev->dev, &fsl_ssi_component,
1578 &ssi_private->cpu_dai_drv, 1); 1593 &ssi_private->cpu_dai_drv, 1);
1579 if (ret) { 1594 if (ret) {
@@ -1657,6 +1672,13 @@ error_sound_card:
1657 fsl_ssi_debugfs_remove(&ssi_private->dbg_stats); 1672 fsl_ssi_debugfs_remove(&ssi_private->dbg_stats);
1658 1673
1659error_asoc_register: 1674error_asoc_register:
1675 if (fsl_ssi_is_ac97(ssi_private))
1676 snd_soc_set_ac97_ops(NULL);
1677
1678error_ac97_ops:
1679 if (fsl_ssi_is_ac97(ssi_private))
1680 mutex_destroy(&ssi_private->ac97_reg_lock);
1681
1660 if (ssi_private->soc->imx) 1682 if (ssi_private->soc->imx)
1661 fsl_ssi_imx_clean(pdev, ssi_private); 1683 fsl_ssi_imx_clean(pdev, ssi_private);
1662 1684
@@ -1675,8 +1697,10 @@ static int fsl_ssi_remove(struct platform_device *pdev)
1675 if (ssi_private->soc->imx) 1697 if (ssi_private->soc->imx)
1676 fsl_ssi_imx_clean(pdev, ssi_private); 1698 fsl_ssi_imx_clean(pdev, ssi_private);
1677 1699
1678 if (fsl_ssi_is_ac97(ssi_private)) 1700 if (fsl_ssi_is_ac97(ssi_private)) {
1679 snd_soc_set_ac97_ops(NULL); 1701 snd_soc_set_ac97_ops(NULL);
1702 mutex_destroy(&ssi_private->ac97_reg_lock);
1703 }
1680 1704
1681 return 0; 1705 return 0;
1682} 1706}
diff --git a/sound/soc/intel/boards/kbl_rt5663_max98927.c b/sound/soc/intel/boards/kbl_rt5663_max98927.c
index 6f9a8bcf20f3..6dcad0a8a0d0 100644
--- a/sound/soc/intel/boards/kbl_rt5663_max98927.c
+++ b/sound/soc/intel/boards/kbl_rt5663_max98927.c
@@ -101,7 +101,7 @@ static const struct snd_soc_dapm_route kabylake_map[] = {
101 { "ssp0 Tx", NULL, "spk_out" }, 101 { "ssp0 Tx", NULL, "spk_out" },
102 102
103 { "AIF Playback", NULL, "ssp1 Tx" }, 103 { "AIF Playback", NULL, "ssp1 Tx" },
104 { "ssp1 Tx", NULL, "hs_out" }, 104 { "ssp1 Tx", NULL, "codec1_out" },
105 105
106 { "hs_in", NULL, "ssp1 Rx" }, 106 { "hs_in", NULL, "ssp1 Rx" },
107 { "ssp1 Rx", NULL, "AIF Capture" }, 107 { "ssp1 Rx", NULL, "AIF Capture" },
diff --git a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
index 6072164f2d43..271ae3c2c535 100644
--- a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
+++ b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
@@ -109,7 +109,7 @@ static const struct snd_soc_dapm_route kabylake_map[] = {
109 { "ssp0 Tx", NULL, "spk_out" }, 109 { "ssp0 Tx", NULL, "spk_out" },
110 110
111 { "AIF Playback", NULL, "ssp1 Tx" }, 111 { "AIF Playback", NULL, "ssp1 Tx" },
112 { "ssp1 Tx", NULL, "hs_out" }, 112 { "ssp1 Tx", NULL, "codec1_out" },
113 113
114 { "hs_in", NULL, "ssp1 Rx" }, 114 { "hs_in", NULL, "ssp1 Rx" },
115 { "ssp1 Rx", NULL, "AIF Capture" }, 115 { "ssp1 Rx", NULL, "AIF Capture" },
diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c
index d14c50a60289..3eaac41090ca 100644
--- a/sound/soc/intel/skylake/skl-nhlt.c
+++ b/sound/soc/intel/skylake/skl-nhlt.c
@@ -119,11 +119,16 @@ static bool skl_check_ep_match(struct device *dev, struct nhlt_endpoint *epnt,
119 119
120 if ((epnt->virtual_bus_id == instance_id) && 120 if ((epnt->virtual_bus_id == instance_id) &&
121 (epnt->linktype == link_type) && 121 (epnt->linktype == link_type) &&
122 (epnt->direction == dirn) && 122 (epnt->direction == dirn)) {
123 (epnt->device_type == dev_type)) 123 /* do not check dev_type for DMIC link type */
124 return true; 124 if (epnt->linktype == NHLT_LINK_DMIC)
125 else 125 return true;
126 return false; 126
127 if (epnt->device_type == dev_type)
128 return true;
129 }
130
131 return false;
127} 132}
128 133
129struct nhlt_specific_cfg 134struct nhlt_specific_cfg
diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
index a072bcf209d2..81923da18ac2 100644
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c
@@ -2908,7 +2908,7 @@ static int skl_tplg_control_load(struct snd_soc_component *cmpnt,
2908 break; 2908 break;
2909 2909
2910 default: 2910 default:
2911 dev_warn(bus->dev, "Control load not supported %d:%d:%d\n", 2911 dev_dbg(bus->dev, "Control load not supported %d:%d:%d\n",
2912 hdr->ops.get, hdr->ops.put, hdr->ops.info); 2912 hdr->ops.get, hdr->ops.put, hdr->ops.info);
2913 break; 2913 break;
2914 } 2914 }
diff --git a/sound/soc/rockchip/rockchip_spdif.c b/sound/soc/rockchip/rockchip_spdif.c
index ee5055d47d13..a89fe9b6463b 100644
--- a/sound/soc/rockchip/rockchip_spdif.c
+++ b/sound/soc/rockchip/rockchip_spdif.c
@@ -322,26 +322,30 @@ static int rk_spdif_probe(struct platform_device *pdev)
322 spdif->mclk = devm_clk_get(&pdev->dev, "mclk"); 322 spdif->mclk = devm_clk_get(&pdev->dev, "mclk");
323 if (IS_ERR(spdif->mclk)) { 323 if (IS_ERR(spdif->mclk)) {
324 dev_err(&pdev->dev, "Can't retrieve rk_spdif master clock\n"); 324 dev_err(&pdev->dev, "Can't retrieve rk_spdif master clock\n");
325 return PTR_ERR(spdif->mclk); 325 ret = PTR_ERR(spdif->mclk);
326 goto err_disable_hclk;
326 } 327 }
327 328
328 ret = clk_prepare_enable(spdif->mclk); 329 ret = clk_prepare_enable(spdif->mclk);
329 if (ret) { 330 if (ret) {
330 dev_err(spdif->dev, "clock enable failed %d\n", ret); 331 dev_err(spdif->dev, "clock enable failed %d\n", ret);
331 return ret; 332 goto err_disable_clocks;
332 } 333 }
333 334
334 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 335 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
335 regs = devm_ioremap_resource(&pdev->dev, res); 336 regs = devm_ioremap_resource(&pdev->dev, res);
336 if (IS_ERR(regs)) 337 if (IS_ERR(regs)) {
337 return PTR_ERR(regs); 338 ret = PTR_ERR(regs);
339 goto err_disable_clocks;
340 }
338 341
339 spdif->regmap = devm_regmap_init_mmio_clk(&pdev->dev, "hclk", regs, 342 spdif->regmap = devm_regmap_init_mmio_clk(&pdev->dev, "hclk", regs,
340 &rk_spdif_regmap_config); 343 &rk_spdif_regmap_config);
341 if (IS_ERR(spdif->regmap)) { 344 if (IS_ERR(spdif->regmap)) {
342 dev_err(&pdev->dev, 345 dev_err(&pdev->dev,
343 "Failed to initialise managed register map\n"); 346 "Failed to initialise managed register map\n");
344 return PTR_ERR(spdif->regmap); 347 ret = PTR_ERR(spdif->regmap);
348 goto err_disable_clocks;
345 } 349 }
346 350
347 spdif->playback_dma_data.addr = res->start + SPDIF_SMPDR; 351 spdif->playback_dma_data.addr = res->start + SPDIF_SMPDR;
@@ -373,6 +377,10 @@ static int rk_spdif_probe(struct platform_device *pdev)
373 377
374err_pm_runtime: 378err_pm_runtime:
375 pm_runtime_disable(&pdev->dev); 379 pm_runtime_disable(&pdev->dev);
380err_disable_clocks:
381 clk_disable_unprepare(spdif->mclk);
382err_disable_hclk:
383 clk_disable_unprepare(spdif->hclk);
376 384
377 return ret; 385 return ret;
378} 386}
diff --git a/sound/soc/sh/rcar/adg.c b/sound/soc/sh/rcar/adg.c
index 8ddb08714faa..4672688cac32 100644
--- a/sound/soc/sh/rcar/adg.c
+++ b/sound/soc/sh/rcar/adg.c
@@ -222,7 +222,7 @@ int rsnd_adg_set_cmd_timsel_gen2(struct rsnd_mod *cmd_mod,
222 NULL, &val, NULL); 222 NULL, &val, NULL);
223 223
224 val = val << shift; 224 val = val << shift;
225 mask = 0xffff << shift; 225 mask = 0x0f1f << shift;
226 226
227 rsnd_mod_bset(adg_mod, CMDOUT_TIMSEL, mask, val); 227 rsnd_mod_bset(adg_mod, CMDOUT_TIMSEL, mask, val);
228 228
@@ -250,7 +250,7 @@ int rsnd_adg_set_src_timesel_gen2(struct rsnd_mod *src_mod,
250 250
251 in = in << shift; 251 in = in << shift;
252 out = out << shift; 252 out = out << shift;
253 mask = 0xffff << shift; 253 mask = 0x0f1f << shift;
254 254
255 switch (id / 2) { 255 switch (id / 2) {
256 case 0: 256 case 0:
@@ -380,7 +380,7 @@ int rsnd_adg_ssi_clk_try_start(struct rsnd_mod *ssi_mod, unsigned int rate)
380 ckr = 0x80000000; 380 ckr = 0x80000000;
381 } 381 }
382 382
383 rsnd_mod_bset(adg_mod, BRGCKR, 0x80FF0000, adg->ckr | ckr); 383 rsnd_mod_bset(adg_mod, BRGCKR, 0x80770000, adg->ckr | ckr);
384 rsnd_mod_write(adg_mod, BRRA, adg->rbga); 384 rsnd_mod_write(adg_mod, BRRA, adg->rbga);
385 rsnd_mod_write(adg_mod, BRRB, adg->rbgb); 385 rsnd_mod_write(adg_mod, BRRB, adg->rbgb);
386 386
diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c
index c70eb2097816..f12a88a21dfa 100644
--- a/sound/soc/sh/rcar/core.c
+++ b/sound/soc/sh/rcar/core.c
@@ -1332,8 +1332,8 @@ static int rsnd_pcm_new(struct snd_soc_pcm_runtime *rtd)
1332 1332
1333 return snd_pcm_lib_preallocate_pages_for_all( 1333 return snd_pcm_lib_preallocate_pages_for_all(
1334 rtd->pcm, 1334 rtd->pcm,
1335 SNDRV_DMA_TYPE_CONTINUOUS, 1335 SNDRV_DMA_TYPE_DEV,
1336 snd_dma_continuous_data(GFP_KERNEL), 1336 rtd->card->snd_card->dev,
1337 PREALLOC_BUFFER, PREALLOC_BUFFER_MAX); 1337 PREALLOC_BUFFER, PREALLOC_BUFFER_MAX);
1338} 1338}
1339 1339
diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c
index fd557abfe390..4d750bdf8e24 100644
--- a/sound/soc/sh/rcar/dma.c
+++ b/sound/soc/sh/rcar/dma.c
@@ -26,10 +26,7 @@
26struct rsnd_dmaen { 26struct rsnd_dmaen {
27 struct dma_chan *chan; 27 struct dma_chan *chan;
28 dma_cookie_t cookie; 28 dma_cookie_t cookie;
29 dma_addr_t dma_buf;
30 unsigned int dma_len; 29 unsigned int dma_len;
31 unsigned int dma_period;
32 unsigned int dma_cnt;
33}; 30};
34 31
35struct rsnd_dmapp { 32struct rsnd_dmapp {
@@ -71,38 +68,10 @@ static struct rsnd_mod mem = {
71/* 68/*
72 * Audio DMAC 69 * Audio DMAC
73 */ 70 */
74#define rsnd_dmaen_sync(dmaen, io, i) __rsnd_dmaen_sync(dmaen, io, i, 1)
75#define rsnd_dmaen_unsync(dmaen, io, i) __rsnd_dmaen_sync(dmaen, io, i, 0)
76static void __rsnd_dmaen_sync(struct rsnd_dmaen *dmaen, struct rsnd_dai_stream *io,
77 int i, int sync)
78{
79 struct device *dev = dmaen->chan->device->dev;
80 enum dma_data_direction dir;
81 int is_play = rsnd_io_is_play(io);
82 dma_addr_t buf;
83 int len, max;
84 size_t period;
85
86 len = dmaen->dma_len;
87 period = dmaen->dma_period;
88 max = len / period;
89 i = i % max;
90 buf = dmaen->dma_buf + (period * i);
91
92 dir = is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
93
94 if (sync)
95 dma_sync_single_for_device(dev, buf, period, dir);
96 else
97 dma_sync_single_for_cpu(dev, buf, period, dir);
98}
99
100static void __rsnd_dmaen_complete(struct rsnd_mod *mod, 71static void __rsnd_dmaen_complete(struct rsnd_mod *mod,
101 struct rsnd_dai_stream *io) 72 struct rsnd_dai_stream *io)
102{ 73{
103 struct rsnd_priv *priv = rsnd_mod_to_priv(mod); 74 struct rsnd_priv *priv = rsnd_mod_to_priv(mod);
104 struct rsnd_dma *dma = rsnd_mod_to_dma(mod);
105 struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma);
106 bool elapsed = false; 75 bool elapsed = false;
107 unsigned long flags; 76 unsigned long flags;
108 77
@@ -115,22 +84,9 @@ static void __rsnd_dmaen_complete(struct rsnd_mod *mod,
115 */ 84 */
116 spin_lock_irqsave(&priv->lock, flags); 85 spin_lock_irqsave(&priv->lock, flags);
117 86
118 if (rsnd_io_is_working(io)) { 87 if (rsnd_io_is_working(io))
119 rsnd_dmaen_unsync(dmaen, io, dmaen->dma_cnt);
120
121 /*
122 * Next period is already started.
123 * Let's sync Next Next period
124 * see
125 * rsnd_dmaen_start()
126 */
127 rsnd_dmaen_sync(dmaen, io, dmaen->dma_cnt + 2);
128
129 elapsed = true; 88 elapsed = true;
130 89
131 dmaen->dma_cnt++;
132 }
133
134 spin_unlock_irqrestore(&priv->lock, flags); 90 spin_unlock_irqrestore(&priv->lock, flags);
135 91
136 if (elapsed) 92 if (elapsed)
@@ -165,14 +121,8 @@ static int rsnd_dmaen_stop(struct rsnd_mod *mod,
165 struct rsnd_dma *dma = rsnd_mod_to_dma(mod); 121 struct rsnd_dma *dma = rsnd_mod_to_dma(mod);
166 struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma); 122 struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma);
167 123
168 if (dmaen->chan) { 124 if (dmaen->chan)
169 int is_play = rsnd_io_is_play(io);
170
171 dmaengine_terminate_all(dmaen->chan); 125 dmaengine_terminate_all(dmaen->chan);
172 dma_unmap_single(dmaen->chan->device->dev,
173 dmaen->dma_buf, dmaen->dma_len,
174 is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
175 }
176 126
177 return 0; 127 return 0;
178} 128}
@@ -237,11 +187,7 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod,
237 struct device *dev = rsnd_priv_to_dev(priv); 187 struct device *dev = rsnd_priv_to_dev(priv);
238 struct dma_async_tx_descriptor *desc; 188 struct dma_async_tx_descriptor *desc;
239 struct dma_slave_config cfg = {}; 189 struct dma_slave_config cfg = {};
240 dma_addr_t buf;
241 size_t len;
242 size_t period;
243 int is_play = rsnd_io_is_play(io); 190 int is_play = rsnd_io_is_play(io);
244 int i;
245 int ret; 191 int ret;
246 192
247 cfg.direction = is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM; 193 cfg.direction = is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM;
@@ -258,19 +204,10 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod,
258 if (ret < 0) 204 if (ret < 0)
259 return ret; 205 return ret;
260 206
261 len = snd_pcm_lib_buffer_bytes(substream);
262 period = snd_pcm_lib_period_bytes(substream);
263 buf = dma_map_single(dmaen->chan->device->dev,
264 substream->runtime->dma_area,
265 len,
266 is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
267 if (dma_mapping_error(dmaen->chan->device->dev, buf)) {
268 dev_err(dev, "dma map failed\n");
269 return -EIO;
270 }
271
272 desc = dmaengine_prep_dma_cyclic(dmaen->chan, 207 desc = dmaengine_prep_dma_cyclic(dmaen->chan,
273 buf, len, period, 208 substream->runtime->dma_addr,
209 snd_pcm_lib_buffer_bytes(substream),
210 snd_pcm_lib_period_bytes(substream),
274 is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM, 211 is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM,
275 DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 212 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
276 213
@@ -282,18 +219,7 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod,
282 desc->callback = rsnd_dmaen_complete; 219 desc->callback = rsnd_dmaen_complete;
283 desc->callback_param = rsnd_mod_get(dma); 220 desc->callback_param = rsnd_mod_get(dma);
284 221
285 dmaen->dma_buf = buf; 222 dmaen->dma_len = snd_pcm_lib_buffer_bytes(substream);
286 dmaen->dma_len = len;
287 dmaen->dma_period = period;
288 dmaen->dma_cnt = 0;
289
290 /*
291 * synchronize this and next period
292 * see
293 * __rsnd_dmaen_complete()
294 */
295 for (i = 0; i < 2; i++)
296 rsnd_dmaen_sync(dmaen, io, i);
297 223
298 dmaen->cookie = dmaengine_submit(desc); 224 dmaen->cookie = dmaengine_submit(desc);
299 if (dmaen->cookie < 0) { 225 if (dmaen->cookie < 0) {
diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c
index fece1e5f582f..cbf3bf312d23 100644
--- a/sound/soc/sh/rcar/ssi.c
+++ b/sound/soc/sh/rcar/ssi.c
@@ -446,25 +446,29 @@ static bool rsnd_ssi_pointer_update(struct rsnd_mod *mod,
446 int byte) 446 int byte)
447{ 447{
448 struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); 448 struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod);
449 bool ret = false;
450 int byte_pos;
449 451
450 ssi->byte_pos += byte; 452 byte_pos = ssi->byte_pos + byte;
451 453
452 if (ssi->byte_pos >= ssi->next_period_byte) { 454 if (byte_pos >= ssi->next_period_byte) {
453 struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); 455 struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io);
454 456
455 ssi->period_pos++; 457 ssi->period_pos++;
456 ssi->next_period_byte += ssi->byte_per_period; 458 ssi->next_period_byte += ssi->byte_per_period;
457 459
458 if (ssi->period_pos >= runtime->periods) { 460 if (ssi->period_pos >= runtime->periods) {
459 ssi->byte_pos = 0; 461 byte_pos = 0;
460 ssi->period_pos = 0; 462 ssi->period_pos = 0;
461 ssi->next_period_byte = ssi->byte_per_period; 463 ssi->next_period_byte = ssi->byte_per_period;
462 } 464 }
463 465
464 return true; 466 ret = true;
465 } 467 }
466 468
467 return false; 469 WRITE_ONCE(ssi->byte_pos, byte_pos);
470
471 return ret;
468} 472}
469 473
470/* 474/*
@@ -838,7 +842,7 @@ static int rsnd_ssi_pointer(struct rsnd_mod *mod,
838 struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); 842 struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod);
839 struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); 843 struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io);
840 844
841 *pointer = bytes_to_frames(runtime, ssi->byte_pos); 845 *pointer = bytes_to_frames(runtime, READ_ONCE(ssi->byte_pos));
842 846
843 return 0; 847 return 0;
844} 848}
diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c
index 4d948757d300..6ff8a36c2c82 100644
--- a/sound/soc/sh/rcar/ssiu.c
+++ b/sound/soc/sh/rcar/ssiu.c
@@ -125,6 +125,7 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod,
125{ 125{
126 int hdmi = rsnd_ssi_hdmi_port(io); 126 int hdmi = rsnd_ssi_hdmi_port(io);
127 int ret; 127 int ret;
128 u32 mode = 0;
128 129
129 ret = rsnd_ssiu_init(mod, io, priv); 130 ret = rsnd_ssiu_init(mod, io, priv);
130 if (ret < 0) 131 if (ret < 0)
@@ -136,9 +137,11 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod,
136 * see 137 * see
137 * rsnd_ssi_config_init() 138 * rsnd_ssi_config_init()
138 */ 139 */
139 rsnd_mod_write(mod, SSI_MODE, 0x1); 140 mode = 0x1;
140 } 141 }
141 142
143 rsnd_mod_write(mod, SSI_MODE, mode);
144
142 if (rsnd_ssi_use_busif(io)) { 145 if (rsnd_ssi_use_busif(io)) {
143 rsnd_mod_write(mod, SSI_BUSIF_ADINR, 146 rsnd_mod_write(mod, SSI_BUSIF_ADINR,
144 rsnd_get_adinr_bit(mod, io) | 147 rsnd_get_adinr_bit(mod, io) |
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 7c9e361b2200..2b4ceda36291 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -2173,20 +2173,25 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid,
2173 kctl->private_value = (unsigned long)namelist; 2173 kctl->private_value = (unsigned long)namelist;
2174 kctl->private_free = usb_mixer_selector_elem_free; 2174 kctl->private_free = usb_mixer_selector_elem_free;
2175 2175
2176 nameid = uac_selector_unit_iSelector(desc); 2176 /* check the static mapping table at first */
2177 len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name)); 2177 len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name));
2178 if (len)
2179 ;
2180 else if (nameid)
2181 len = snd_usb_copy_string_desc(state, nameid, kctl->id.name,
2182 sizeof(kctl->id.name));
2183 else
2184 len = get_term_name(state, &state->oterm,
2185 kctl->id.name, sizeof(kctl->id.name), 0);
2186
2187 if (!len) { 2178 if (!len) {
2188 strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name)); 2179 /* no mapping ? */
2180 /* if iSelector is given, use it */
2181 nameid = uac_selector_unit_iSelector(desc);
2182 if (nameid)
2183 len = snd_usb_copy_string_desc(state, nameid,
2184 kctl->id.name,
2185 sizeof(kctl->id.name));
2186 /* ... or pick up the terminal name at next */
2187 if (!len)
2188 len = get_term_name(state, &state->oterm,
2189 kctl->id.name, sizeof(kctl->id.name), 0);
2190 /* ... or use the fixed string "USB" as the last resort */
2191 if (!len)
2192 strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name));
2189 2193
2194 /* and add the proper suffix */
2190 if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR) 2195 if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR)
2191 append_ctl_name(kctl, " Clock Source"); 2196 append_ctl_name(kctl, " Clock Source");
2192 else if ((state->oterm.type & 0xff00) == 0x0100) 2197 else if ((state->oterm.type & 0xff00) == 0x0100)
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 77eecaa4db1f..a66ef5777887 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1166,10 +1166,11 @@ static bool is_marantz_denon_dac(unsigned int id)
1166/* TEAC UD-501/UD-503/NT-503 USB DACs need a vendor cmd to switch 1166/* TEAC UD-501/UD-503/NT-503 USB DACs need a vendor cmd to switch
1167 * between PCM/DOP and native DSD mode 1167 * between PCM/DOP and native DSD mode
1168 */ 1168 */
1169static bool is_teac_50X_dac(unsigned int id) 1169static bool is_teac_dsd_dac(unsigned int id)
1170{ 1170{
1171 switch (id) { 1171 switch (id) {
1172 case USB_ID(0x0644, 0x8043): /* TEAC UD-501/UD-503/NT-503 */ 1172 case USB_ID(0x0644, 0x8043): /* TEAC UD-501/UD-503/NT-503 */
1173 case USB_ID(0x0644, 0x8044): /* Esoteric D-05X */
1173 return true; 1174 return true;
1174 } 1175 }
1175 return false; 1176 return false;
@@ -1202,7 +1203,7 @@ int snd_usb_select_mode_quirk(struct snd_usb_substream *subs,
1202 break; 1203 break;
1203 } 1204 }
1204 mdelay(20); 1205 mdelay(20);
1205 } else if (is_teac_50X_dac(subs->stream->chip->usb_id)) { 1206 } else if (is_teac_dsd_dac(subs->stream->chip->usb_id)) {
1206 /* Vendor mode switch cmd is required. */ 1207 /* Vendor mode switch cmd is required. */
1207 switch (fmt->altsetting) { 1208 switch (fmt->altsetting) {
1208 case 3: /* DSD mode (DSD_U32) requested */ 1209 case 3: /* DSD mode (DSD_U32) requested */
@@ -1392,7 +1393,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
1392 } 1393 }
1393 1394
1394 /* TEAC devices with USB DAC functionality */ 1395 /* TEAC devices with USB DAC functionality */
1395 if (is_teac_50X_dac(chip->usb_id)) { 1396 if (is_teac_dsd_dac(chip->usb_id)) {
1396 if (fp->altsetting == 3) 1397 if (fp->altsetting == 3)
1397 return SNDRV_PCM_FMTBIT_DSD_U32_BE; 1398 return SNDRV_PCM_FMTBIT_DSD_U32_BE;
1398 } 1399 }
diff --git a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
index cefe7c7cd4f6..0a8e37a519f2 100644
--- a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
+++ b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
@@ -2,7 +2,7 @@
2#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ 2#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
3#define _UAPI__ASM_BPF_PERF_EVENT_H__ 3#define _UAPI__ASM_BPF_PERF_EVENT_H__
4 4
5#include <asm/ptrace.h> 5#include "ptrace.h"
6 6
7typedef user_pt_regs bpf_user_pt_regs_t; 7typedef user_pt_regs bpf_user_pt_regs_t;
8 8
diff --git a/tools/arch/s390/include/uapi/asm/perf_regs.h b/tools/arch/s390/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000000..d17dd9e5d516
--- /dev/null
+++ b/tools/arch/s390/include/uapi/asm/perf_regs.h
@@ -0,0 +1,44 @@
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2#ifndef _ASM_S390_PERF_REGS_H
3#define _ASM_S390_PERF_REGS_H
4
5enum perf_event_s390_regs {
6 PERF_REG_S390_R0,
7 PERF_REG_S390_R1,
8 PERF_REG_S390_R2,
9 PERF_REG_S390_R3,
10 PERF_REG_S390_R4,
11 PERF_REG_S390_R5,
12 PERF_REG_S390_R6,
13 PERF_REG_S390_R7,
14 PERF_REG_S390_R8,
15 PERF_REG_S390_R9,
16 PERF_REG_S390_R10,
17 PERF_REG_S390_R11,
18 PERF_REG_S390_R12,
19 PERF_REG_S390_R13,
20 PERF_REG_S390_R14,
21 PERF_REG_S390_R15,
22 PERF_REG_S390_FP0,
23 PERF_REG_S390_FP1,
24 PERF_REG_S390_FP2,
25 PERF_REG_S390_FP3,
26 PERF_REG_S390_FP4,
27 PERF_REG_S390_FP5,
28 PERF_REG_S390_FP6,
29 PERF_REG_S390_FP7,
30 PERF_REG_S390_FP8,
31 PERF_REG_S390_FP9,
32 PERF_REG_S390_FP10,
33 PERF_REG_S390_FP11,
34 PERF_REG_S390_FP12,
35 PERF_REG_S390_FP13,
36 PERF_REG_S390_FP14,
37 PERF_REG_S390_FP15,
38 PERF_REG_S390_MASK,
39 PERF_REG_S390_PC,
40
41 PERF_REG_S390_MAX
42};
43
44#endif /* _ASM_S390_PERF_REGS_H */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index c0b0e9e8aa66..800104c8a3ed 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -266,6 +266,7 @@
266/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ 266/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
267#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ 267#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
268#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ 268#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
269#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
269 270
270/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ 271/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
271#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ 272#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index e2450c8e88e6..a8c3a33dd185 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -523,21 +523,23 @@ static int do_show(int argc, char **argv)
523 break; 523 break;
524 p_err("can't get next map: %s%s", strerror(errno), 524 p_err("can't get next map: %s%s", strerror(errno),
525 errno == EINVAL ? " -- kernel too old?" : ""); 525 errno == EINVAL ? " -- kernel too old?" : "");
526 return -1; 526 break;
527 } 527 }
528 528
529 fd = bpf_map_get_fd_by_id(id); 529 fd = bpf_map_get_fd_by_id(id);
530 if (fd < 0) { 530 if (fd < 0) {
531 if (errno == ENOENT)
532 continue;
531 p_err("can't get map by id (%u): %s", 533 p_err("can't get map by id (%u): %s",
532 id, strerror(errno)); 534 id, strerror(errno));
533 return -1; 535 break;
534 } 536 }
535 537
536 err = bpf_obj_get_info_by_fd(fd, &info, &len); 538 err = bpf_obj_get_info_by_fd(fd, &info, &len);
537 if (err) { 539 if (err) {
538 p_err("can't get map info: %s", strerror(errno)); 540 p_err("can't get map info: %s", strerror(errno));
539 close(fd); 541 close(fd);
540 return -1; 542 break;
541 } 543 }
542 544
543 if (json_output) 545 if (json_output)
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index ad619b96c276..dded77345bfb 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -382,6 +382,8 @@ static int do_show(int argc, char **argv)
382 382
383 fd = bpf_prog_get_fd_by_id(id); 383 fd = bpf_prog_get_fd_by_id(id);
384 if (fd < 0) { 384 if (fd < 0) {
385 if (errno == ENOENT)
386 continue;
385 p_err("can't get prog by id (%u): %s", 387 p_err("can't get prog by id (%u): %s",
386 id, strerror(errno)); 388 id, strerror(errno));
387 err = -1; 389 err = -1;
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 07fd03c74a77..04e32f965ad7 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -84,8 +84,6 @@
84 84
85#define uninitialized_var(x) x = *(&(x)) 85#define uninitialized_var(x) x = *(&(x))
86 86
87#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
88
89#include <linux/types.h> 87#include <linux/types.h>
90 88
91/* 89/*
@@ -135,20 +133,19 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
135/* 133/*
136 * Prevent the compiler from merging or refetching reads or writes. The 134 * Prevent the compiler from merging or refetching reads or writes. The
137 * compiler is also forbidden from reordering successive instances of 135 * compiler is also forbidden from reordering successive instances of
138 * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the 136 * READ_ONCE and WRITE_ONCE, but only when the compiler is aware of some
139 * compiler is aware of some particular ordering. One way to make the 137 * particular ordering. One way to make the compiler aware of ordering is to
140 * compiler aware of ordering is to put the two invocations of READ_ONCE, 138 * put the two invocations of READ_ONCE or WRITE_ONCE in different C
141 * WRITE_ONCE or ACCESS_ONCE() in different C statements. 139 * statements.
142 * 140 *
143 * In contrast to ACCESS_ONCE these two macros will also work on aggregate 141 * These two macros will also work on aggregate data types like structs or
144 * data types like structs or unions. If the size of the accessed data 142 * unions. If the size of the accessed data type exceeds the word size of
145 * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) 143 * the machine (e.g., 32 bits or 64 bits) READ_ONCE() and WRITE_ONCE() will
146 * READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a 144 * fall back to memcpy and print a compile-time warning.
147 * compile-time warning.
148 * 145 *
149 * Their two major use cases are: (1) Mediating communication between 146 * Their two major use cases are: (1) Mediating communication between
150 * process-level code and irq/NMI handlers, all running on the same CPU, 147 * process-level code and irq/NMI handlers, all running on the same CPU,
151 * and (2) Ensuring that the compiler does not fold, spindle, or otherwise 148 * and (2) Ensuring that the compiler does not fold, spindle, or otherwise
152 * mutilate accesses that either do not require ordering or that interact 149 * mutilate accesses that either do not require ordering or that interact
153 * with an explicit memory barrier or atomic instruction that provides the 150 * with an explicit memory barrier or atomic instruction that provides the
154 * required ordering. 151 * required ordering.
diff --git a/tools/include/linux/lockdep.h b/tools/include/linux/lockdep.h
index 940c1b075659..6b0c36a58fcb 100644
--- a/tools/include/linux/lockdep.h
+++ b/tools/include/linux/lockdep.h
@@ -48,6 +48,7 @@ static inline int debug_locks_off(void)
48#define printk(...) dprintf(STDOUT_FILENO, __VA_ARGS__) 48#define printk(...) dprintf(STDOUT_FILENO, __VA_ARGS__)
49#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) 49#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
50#define pr_warn pr_err 50#define pr_warn pr_err
51#define pr_cont pr_err
51 52
52#define list_del_rcu list_del 53#define list_del_rcu list_del
53 54
diff --git a/tools/include/uapi/asm/bpf_perf_event.h b/tools/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 000000000000..13a58531e6fa
--- /dev/null
+++ b/tools/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,7 @@
1#if defined(__aarch64__)
2#include "../../arch/arm64/include/uapi/asm/bpf_perf_event.h"
3#elif defined(__s390__)
4#include "../../arch/s390/include/uapi/asm/bpf_perf_event.h"
5#else
6#include <uapi/asm-generic/bpf_perf_event.h>
7#endif
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 282d7613fce8..496e59a2738b 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -630,9 +630,9 @@ struct kvm_s390_irq {
630 630
631struct kvm_s390_irq_state { 631struct kvm_s390_irq_state {
632 __u64 buf; 632 __u64 buf;
633 __u32 flags; 633 __u32 flags; /* will stay unused for compatibility reasons */
634 __u32 len; 634 __u32 len;
635 __u32 reserved[4]; 635 __u32 reserved[4]; /* will stay unused for compatibility reasons */
636}; 636};
637 637
638/* for KVM_SET_GUEST_DEBUG */ 638/* for KVM_SET_GUEST_DEBUG */
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 217cf6f95c36..a5684d0968b4 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -478,7 +478,7 @@ class Provider(object):
478 @staticmethod 478 @staticmethod
479 def is_field_wanted(fields_filter, field): 479 def is_field_wanted(fields_filter, field):
480 """Indicate whether field is valid according to fields_filter.""" 480 """Indicate whether field is valid according to fields_filter."""
481 if not fields_filter or fields_filter == "help": 481 if not fields_filter:
482 return True 482 return True
483 return re.match(fields_filter, field) is not None 483 return re.match(fields_filter, field) is not None
484 484
@@ -549,8 +549,8 @@ class TracepointProvider(Provider):
549 549
550 def update_fields(self, fields_filter): 550 def update_fields(self, fields_filter):
551 """Refresh fields, applying fields_filter""" 551 """Refresh fields, applying fields_filter"""
552 self._fields = [field for field in self.get_available_fields() 552 self.fields = [field for field in self.get_available_fields()
553 if self.is_field_wanted(fields_filter, field)] 553 if self.is_field_wanted(fields_filter, field)]
554 554
555 @staticmethod 555 @staticmethod
556 def get_online_cpus(): 556 def get_online_cpus():
@@ -950,7 +950,8 @@ class Tui(object):
950 curses.nocbreak() 950 curses.nocbreak()
951 curses.endwin() 951 curses.endwin()
952 952
953 def get_all_gnames(self): 953 @staticmethod
954 def get_all_gnames():
954 """Returns a list of (pid, gname) tuples of all running guests""" 955 """Returns a list of (pid, gname) tuples of all running guests"""
955 res = [] 956 res = []
956 try: 957 try:
@@ -963,7 +964,7 @@ class Tui(object):
963 # perform a sanity check before calling the more expensive 964 # perform a sanity check before calling the more expensive
964 # function to possibly extract the guest name 965 # function to possibly extract the guest name
965 if ' -name ' in line[1]: 966 if ' -name ' in line[1]:
966 res.append((line[0], self.get_gname_from_pid(line[0]))) 967 res.append((line[0], Tui.get_gname_from_pid(line[0])))
967 child.stdout.close() 968 child.stdout.close()
968 969
969 return res 970 return res
@@ -984,7 +985,8 @@ class Tui(object):
984 except Exception: 985 except Exception:
985 self.screen.addstr(row + 1, 2, 'Not available') 986 self.screen.addstr(row + 1, 2, 'Not available')
986 987
987 def get_pid_from_gname(self, gname): 988 @staticmethod
989 def get_pid_from_gname(gname):
988 """Fuzzy function to convert guest name to QEMU process pid. 990 """Fuzzy function to convert guest name to QEMU process pid.
989 991
990 Returns a list of potential pids, can be empty if no match found. 992 Returns a list of potential pids, can be empty if no match found.
@@ -992,7 +994,7 @@ class Tui(object):
992 994
993 """ 995 """
994 pids = [] 996 pids = []
995 for line in self.get_all_gnames(): 997 for line in Tui.get_all_gnames():
996 if gname == line[1]: 998 if gname == line[1]:
997 pids.append(int(line[0])) 999 pids.append(int(line[0]))
998 1000
@@ -1090,15 +1092,16 @@ class Tui(object):
1090 # sort by totals 1092 # sort by totals
1091 return (0, -stats[x][0]) 1093 return (0, -stats[x][0])
1092 total = 0. 1094 total = 0.
1093 for val in stats.values(): 1095 for key in stats.keys():
1094 total += val[0] 1096 if key.find('(') is -1:
1097 total += stats[key][0]
1095 if self._sorting == SORT_DEFAULT: 1098 if self._sorting == SORT_DEFAULT:
1096 sortkey = sortCurAvg 1099 sortkey = sortCurAvg
1097 else: 1100 else:
1098 sortkey = sortTotal 1101 sortkey = sortTotal
1102 tavg = 0
1099 for key in sorted(stats.keys(), key=sortkey): 1103 for key in sorted(stats.keys(), key=sortkey):
1100 1104 if row >= self.screen.getmaxyx()[0] - 1:
1101 if row >= self.screen.getmaxyx()[0]:
1102 break 1105 break
1103 values = stats[key] 1106 values = stats[key]
1104 if not values[0] and not values[1]: 1107 if not values[0] and not values[1]:
@@ -1110,9 +1113,15 @@ class Tui(object):
1110 self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % 1113 self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' %
1111 (key, values[0], values[0] * 100 / total, 1114 (key, values[0], values[0] * 100 / total,
1112 cur)) 1115 cur))
1116 if cur is not '' and key.find('(') is -1:
1117 tavg += cur
1113 row += 1 1118 row += 1
1114 if row == 3: 1119 if row == 3:
1115 self.screen.addstr(4, 1, 'No matching events reported yet') 1120 self.screen.addstr(4, 1, 'No matching events reported yet')
1121 else:
1122 self.screen.addstr(row, 1, '%-40s %10d %8s' %
1123 ('Total', total, tavg if tavg else ''),
1124 curses.A_BOLD)
1116 self.screen.refresh() 1125 self.screen.refresh()
1117 1126
1118 def show_msg(self, text): 1127 def show_msg(self, text):
@@ -1358,7 +1367,7 @@ class Tui(object):
1358 if char == 'x': 1367 if char == 'x':
1359 self.update_drilldown() 1368 self.update_drilldown()
1360 # prevents display of current values on next refresh 1369 # prevents display of current values on next refresh
1361 self.stats.get() 1370 self.stats.get(self._display_guests)
1362 except KeyboardInterrupt: 1371 except KeyboardInterrupt:
1363 break 1372 break
1364 except curses.error: 1373 except curses.error:
@@ -1451,16 +1460,13 @@ Press any other key to refresh statistics immediately.
1451 try: 1460 try:
1452 pids = Tui.get_pid_from_gname(val) 1461 pids = Tui.get_pid_from_gname(val)
1453 except: 1462 except:
1454 raise optparse.OptionValueError('Error while searching for guest ' 1463 sys.exit('Error while searching for guest "{}". Use "-p" to '
1455 '"{}", use "-p" to specify a pid ' 1464 'specify a pid instead?'.format(val))
1456 'instead'.format(val))
1457 if len(pids) == 0: 1465 if len(pids) == 0:
1458 raise optparse.OptionValueError('No guest by the name "{}" ' 1466 sys.exit('Error: No guest by the name "{}" found'.format(val))
1459 'found'.format(val))
1460 if len(pids) > 1: 1467 if len(pids) > 1:
1461 raise optparse.OptionValueError('Multiple processes found (pids: ' 1468 sys.exit('Error: Multiple processes found (pids: {}). Use "-p" '
1462 '{}) - use "-p" to specify a pid ' 1469 'to specify the desired pid'.format(" ".join(pids)))
1463 'instead'.format(" ".join(pids)))
1464 parser.values.pid = pids[0] 1470 parser.values.pid = pids[0]
1465 1471
1466 optparser = optparse.OptionParser(description=description_text, 1472 optparser = optparse.OptionParser(description=description_text,
@@ -1518,7 +1524,16 @@ Press any other key to refresh statistics immediately.
1518 help='restrict statistics to guest by name', 1524 help='restrict statistics to guest by name',
1519 callback=cb_guest_to_pid, 1525 callback=cb_guest_to_pid,
1520 ) 1526 )
1521 (options, _) = optparser.parse_args(sys.argv) 1527 options, unkn = optparser.parse_args(sys.argv)
1528 if len(unkn) != 1:
1529 sys.exit('Error: Extra argument(s): ' + ' '.join(unkn[1:]))
1530 try:
1531 # verify that we were passed a valid regex up front
1532 re.compile(options.fields)
1533 except re.error:
1534 sys.exit('Error: "' + options.fields + '" is not a valid regular '
1535 'expression')
1536
1522 return options 1537 return options
1523 1538
1524 1539
@@ -1564,16 +1579,13 @@ def main():
1564 1579
1565 stats = Stats(options) 1580 stats = Stats(options)
1566 1581
1567 if options.fields == "help": 1582 if options.fields == 'help':
1568 event_list = "\n" 1583 stats.fields_filter = None
1569 s = stats.get() 1584 event_list = []
1570 for key in s.keys(): 1585 for key in stats.get().keys():
1571 if key.find('(') != -1: 1586 event_list.append(key.split('(', 1)[0])
1572 key = key[0:key.find('(')] 1587 sys.stdout.write(' ' + '\n '.join(sorted(set(event_list))) + '\n')
1573 if event_list.find('\n' + key + '\n') == -1: 1588 sys.exit(0)
1574 event_list += key + '\n'
1575 sys.stdout.write(event_list)
1576 return ""
1577 1589
1578 if options.log: 1590 if options.log:
1579 log(stats) 1591 log(stats)
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt
index e5cf836be8a1..b5b3810c9e94 100644
--- a/tools/kvm/kvm_stat/kvm_stat.txt
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -50,6 +50,8 @@ INTERACTIVE COMMANDS
50*s*:: set update interval 50*s*:: set update interval
51 51
52*x*:: toggle reporting of stats for child trace events 52*x*:: toggle reporting of stats for child trace events
53 :: *Note*: The stats for the parents summarize the respective child trace
54 events
53 55
54Press any other key to refresh statistics immediately. 56Press any other key to refresh statistics immediately.
55 57
@@ -86,7 +88,7 @@ OPTIONS
86 88
87-f<fields>:: 89-f<fields>::
88--fields=<fields>:: 90--fields=<fields>::
89 fields to display (regex) 91 fields to display (regex), "-f help" for a list of available events
90 92
91-h:: 93-h::
92--help:: 94--help::
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 8acfc47af70e..540a209b78ab 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -138,7 +138,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
138 *type = INSN_STACK; 138 *type = INSN_STACK;
139 op->src.type = OP_SRC_ADD; 139 op->src.type = OP_SRC_ADD;
140 op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; 140 op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
141 op->dest.type = OP_SRC_REG; 141 op->dest.type = OP_DEST_REG;
142 op->dest.reg = CFI_SP; 142 op->dest.reg = CFI_SP;
143 } 143 }
144 break; 144 break;
diff --git a/tools/objtool/arch/x86/lib/x86-opcode-map.txt b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
index 12e377184ee4..e0b85930dd77 100644
--- a/tools/objtool/arch/x86/lib/x86-opcode-map.txt
+++ b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
607fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) 607fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
608fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) 608fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
609fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) 609fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
610ff: 610ff: UD0
611EndTable 611EndTable
612 612
613Table: 3-byte opcode 1 (0x0f 0x38) 613Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
7177e: vpermt2d/q Vx,Hx,Wx (66),(ev) 7177e: vpermt2d/q Vx,Hx,Wx (66),(ev)
7187f: vpermt2ps/d Vx,Hx,Wx (66),(ev) 7187f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
71980: INVEPT Gy,Mdq (66) 71980: INVEPT Gy,Mdq (66)
72081: INVPID Gy,Mdq (66) 72081: INVVPID Gy,Mdq (66)
72182: INVPCID Gy,Mdq (66) 72182: INVPCID Gy,Mdq (66)
72283: vpmultishiftqb Vx,Hx,Wx (66),(ev) 72283: vpmultishiftqb Vx,Hx,Wx (66),(ev)
72388: vexpandps/d Vpd,Wpd (66),(ev) 72388: vexpandps/d Vpd,Wpd (66),(ev)
@@ -896,7 +896,7 @@ EndTable
896 896
897GrpTable: Grp3_1 897GrpTable: Grp3_1
8980: TEST Eb,Ib 8980: TEST Eb,Ib
8991: 8991: TEST Eb,Ib
9002: NOT Eb 9002: NOT Eb
9013: NEG Eb 9013: NEG Eb
9024: MUL AL,Eb 9024: MUL AL,Eb
@@ -970,6 +970,15 @@ GrpTable: Grp9
970EndTable 970EndTable
971 971
972GrpTable: Grp10 972GrpTable: Grp10
973# all are UD1
9740: UD1
9751: UD1
9762: UD1
9773: UD1
9784: UD1
9795: UD1
9806: UD1
9817: UD1
973EndTable 982EndTable
974 983
975# Grp11A and Grp11B are expressed as Grp11 in Intel SDM 984# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
index 4c6b5c9ef073..91e8e19ff5e0 100644
--- a/tools/objtool/builtin-orc.c
+++ b/tools/objtool/builtin-orc.c
@@ -44,6 +44,9 @@ int cmd_orc(int argc, const char **argv)
44 const char *objname; 44 const char *objname;
45 45
46 argc--; argv++; 46 argc--; argv++;
47 if (argc <= 0)
48 usage_with_options(orc_usage, check_options);
49
47 if (!strncmp(argv[0], "gen", 3)) { 50 if (!strncmp(argv[0], "gen", 3)) {
48 argc = parse_options(argc, argv, check_options, orc_usage, 0); 51 argc = parse_options(argc, argv, check_options, orc_usage, 0);
49 if (argc != 1) 52 if (argc != 1)
@@ -52,7 +55,6 @@ int cmd_orc(int argc, const char **argv)
52 objname = argv[0]; 55 objname = argv[0];
53 56
54 return check(objname, no_fp, no_unreachable, true); 57 return check(objname, no_fp, no_unreachable, true);
55
56 } 58 }
57 59
58 if (!strcmp(argv[0], "dump")) { 60 if (!strcmp(argv[0], "dump")) {
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index e5ca31429c9b..e61fe703197b 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -165,6 +165,8 @@ int create_orc_sections(struct objtool_file *file)
165 165
166 /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */ 166 /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */
167 sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx); 167 sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx);
168 if (!sec)
169 return -1;
168 170
169 ip_relasec = elf_create_rela_section(file->elf, sec); 171 ip_relasec = elf_create_rela_section(file->elf, sec);
170 if (!ip_relasec) 172 if (!ip_relasec)
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index ed65e82f034e..0294bfb6c5f8 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -188,9 +188,7 @@ ifdef PYTHON_CONFIG
188 PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) 188 PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
189 PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil 189 PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
190 PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) 190 PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
191 ifeq ($(CC_NO_CLANG), 1) 191 PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
192 PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
193 endif
194 FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) 192 FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
195endif 193endif
196 194
@@ -576,14 +574,15 @@ ifndef NO_GTK2
576 endif 574 endif
577endif 575endif
578 576
579
580ifdef NO_LIBPERL 577ifdef NO_LIBPERL
581 CFLAGS += -DNO_LIBPERL 578 CFLAGS += -DNO_LIBPERL
582else 579else
583 PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null) 580 PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
584 PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) 581 PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
585 PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) 582 PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
586 PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` 583 PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null)
584 PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS))
585 PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS))
587 FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) 586 FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
588 587
589 ifneq ($(feature-libperl), 1) 588 ifneq ($(feature-libperl), 1)
diff --git a/tools/perf/arch/s390/include/perf_regs.h b/tools/perf/arch/s390/include/perf_regs.h
index d2df54a6bc5a..bcfbaed78cc2 100644
--- a/tools/perf/arch/s390/include/perf_regs.h
+++ b/tools/perf/arch/s390/include/perf_regs.h
@@ -3,7 +3,7 @@
3 3
4#include <stdlib.h> 4#include <stdlib.h>
5#include <linux/types.h> 5#include <linux/types.h>
6#include <../../../../arch/s390/include/uapi/asm/perf_regs.h> 6#include <asm/perf_regs.h>
7 7
8void perf_regs_load(u64 *regs); 8void perf_regs_load(u64 *regs);
9 9
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 6db9d809fe97..3e64f10b6d66 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -21,6 +21,7 @@ arch/x86/include/asm/cpufeatures.h
21arch/arm/include/uapi/asm/perf_regs.h 21arch/arm/include/uapi/asm/perf_regs.h
22arch/arm64/include/uapi/asm/perf_regs.h 22arch/arm64/include/uapi/asm/perf_regs.h
23arch/powerpc/include/uapi/asm/perf_regs.h 23arch/powerpc/include/uapi/asm/perf_regs.h
24arch/s390/include/uapi/asm/perf_regs.h
24arch/x86/include/uapi/asm/perf_regs.h 25arch/x86/include/uapi/asm/perf_regs.h
25arch/x86/include/uapi/asm/kvm.h 26arch/x86/include/uapi/asm/kvm.h
26arch/x86/include/uapi/asm/kvm_perf.h 27arch/x86/include/uapi/asm/kvm_perf.h
diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c
index cf36de7ea255..0c6d1002b524 100644
--- a/tools/perf/jvmti/jvmti_agent.c
+++ b/tools/perf/jvmti/jvmti_agent.c
@@ -384,13 +384,13 @@ jvmti_write_code(void *agent, char const *sym,
384} 384}
385 385
386int 386int
387jvmti_write_debug_info(void *agent, uint64_t code, const char *file, 387jvmti_write_debug_info(void *agent, uint64_t code,
388 jvmti_line_info_t *li, int nr_lines) 388 int nr_lines, jvmti_line_info_t *li,
389 const char * const * file_names)
389{ 390{
390 struct jr_code_debug_info rec; 391 struct jr_code_debug_info rec;
391 size_t sret, len, size, flen; 392 size_t sret, len, size, flen = 0;
392 uint64_t addr; 393 uint64_t addr;
393 const char *fn = file;
394 FILE *fp = agent; 394 FILE *fp = agent;
395 int i; 395 int i;
396 396
@@ -405,7 +405,9 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
405 return -1; 405 return -1;
406 } 406 }
407 407
408 flen = strlen(file) + 1; 408 for (i = 0; i < nr_lines; ++i) {
409 flen += strlen(file_names[i]) + 1;
410 }
409 411
410 rec.p.id = JIT_CODE_DEBUG_INFO; 412 rec.p.id = JIT_CODE_DEBUG_INFO;
411 size = sizeof(rec); 413 size = sizeof(rec);
@@ -421,7 +423,7 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
421 * file[] : source file name 423 * file[] : source file name
422 */ 424 */
423 size += nr_lines * sizeof(struct debug_entry); 425 size += nr_lines * sizeof(struct debug_entry);
424 size += flen * nr_lines; 426 size += flen;
425 rec.p.total_size = size; 427 rec.p.total_size = size;
426 428
427 /* 429 /*
@@ -452,7 +454,7 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
452 if (sret != 1) 454 if (sret != 1)
453 goto error; 455 goto error;
454 456
455 sret = fwrite_unlocked(fn, flen, 1, fp); 457 sret = fwrite_unlocked(file_names[i], strlen(file_names[i]) + 1, 1, fp);
456 if (sret != 1) 458 if (sret != 1)
457 goto error; 459 goto error;
458 } 460 }
diff --git a/tools/perf/jvmti/jvmti_agent.h b/tools/perf/jvmti/jvmti_agent.h
index fe32d8344a82..6ed82f6c06dd 100644
--- a/tools/perf/jvmti/jvmti_agent.h
+++ b/tools/perf/jvmti/jvmti_agent.h
@@ -14,6 +14,7 @@ typedef struct {
14 unsigned long pc; 14 unsigned long pc;
15 int line_number; 15 int line_number;
16 int discrim; /* discriminator -- 0 for now */ 16 int discrim; /* discriminator -- 0 for now */
17 jmethodID methodID;
17} jvmti_line_info_t; 18} jvmti_line_info_t;
18 19
19void *jvmti_open(void); 20void *jvmti_open(void);
@@ -22,11 +23,9 @@ int jvmti_write_code(void *agent, char const *symbol_name,
22 uint64_t vma, void const *code, 23 uint64_t vma, void const *code,
23 const unsigned int code_size); 24 const unsigned int code_size);
24 25
25int jvmti_write_debug_info(void *agent, 26int jvmti_write_debug_info(void *agent, uint64_t code, int nr_lines,
26 uint64_t code,
27 const char *file,
28 jvmti_line_info_t *li, 27 jvmti_line_info_t *li,
29 int nr_lines); 28 const char * const * file_names);
30 29
31#if defined(__cplusplus) 30#if defined(__cplusplus)
32} 31}
diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c
index c62c9fc9a525..6add3e982614 100644
--- a/tools/perf/jvmti/libjvmti.c
+++ b/tools/perf/jvmti/libjvmti.c
@@ -47,6 +47,7 @@ do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci,
47 tab[lines].pc = (unsigned long)pc; 47 tab[lines].pc = (unsigned long)pc;
48 tab[lines].line_number = loc_tab[i].line_number; 48 tab[lines].line_number = loc_tab[i].line_number;
49 tab[lines].discrim = 0; /* not yet used */ 49 tab[lines].discrim = 0; /* not yet used */
50 tab[lines].methodID = m;
50 lines++; 51 lines++;
51 } else { 52 } else {
52 break; 53 break;
@@ -125,6 +126,99 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **
125 return JVMTI_ERROR_NONE; 126 return JVMTI_ERROR_NONE;
126} 127}
127 128
129static void
130copy_class_filename(const char * class_sign, const char * file_name, char * result, size_t max_length)
131{
132 /*
133 * Assume path name is class hierarchy, this is a common practice with Java programs
134 */
135 if (*class_sign == 'L') {
136 int j, i = 0;
137 char *p = strrchr(class_sign, '/');
138 if (p) {
139 /* drop the 'L' prefix and copy up to the final '/' */
140 for (i = 0; i < (p - class_sign); i++)
141 result[i] = class_sign[i+1];
142 }
143 /*
144 * append file name, we use loops and not string ops to avoid modifying
145 * class_sign which is used later for the symbol name
146 */
147 for (j = 0; i < (max_length - 1) && file_name && j < strlen(file_name); j++, i++)
148 result[i] = file_name[j];
149
150 result[i] = '\0';
151 } else {
152 /* fallback case */
153 size_t file_name_len = strlen(file_name);
154 strncpy(result, file_name, file_name_len < max_length ? file_name_len : max_length);
155 }
156}
157
158static jvmtiError
159get_source_filename(jvmtiEnv *jvmti, jmethodID methodID, char ** buffer)
160{
161 jvmtiError ret;
162 jclass decl_class;
163 char *file_name = NULL;
164 char *class_sign = NULL;
165 char fn[PATH_MAX];
166 size_t len;
167
168 ret = (*jvmti)->GetMethodDeclaringClass(jvmti, methodID, &decl_class);
169 if (ret != JVMTI_ERROR_NONE) {
170 print_error(jvmti, "GetMethodDeclaringClass", ret);
171 return ret;
172 }
173
174 ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name);
175 if (ret != JVMTI_ERROR_NONE) {
176 print_error(jvmti, "GetSourceFileName", ret);
177 return ret;
178 }
179
180 ret = (*jvmti)->GetClassSignature(jvmti, decl_class, &class_sign, NULL);
181 if (ret != JVMTI_ERROR_NONE) {
182 print_error(jvmti, "GetClassSignature", ret);
183 goto free_file_name_error;
184 }
185
186 copy_class_filename(class_sign, file_name, fn, PATH_MAX);
187 len = strlen(fn);
188 *buffer = malloc((len + 1) * sizeof(char));
189 if (!*buffer) {
190 print_error(jvmti, "GetClassSignature", ret);
191 ret = JVMTI_ERROR_OUT_OF_MEMORY;
192 goto free_class_sign_error;
193 }
194 strcpy(*buffer, fn);
195 ret = JVMTI_ERROR_NONE;
196
197free_class_sign_error:
198 (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign);
199free_file_name_error:
200 (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name);
201
202 return ret;
203}
204
205static jvmtiError
206fill_source_filenames(jvmtiEnv *jvmti, int nr_lines,
207 const jvmti_line_info_t * line_tab,
208 char ** file_names)
209{
210 int index;
211 jvmtiError ret;
212
213 for (index = 0; index < nr_lines; ++index) {
214 ret = get_source_filename(jvmti, line_tab[index].methodID, &(file_names[index]));
215 if (ret != JVMTI_ERROR_NONE)
216 return ret;
217 }
218
219 return JVMTI_ERROR_NONE;
220}
221
128static void JNICALL 222static void JNICALL
129compiled_method_load_cb(jvmtiEnv *jvmti, 223compiled_method_load_cb(jvmtiEnv *jvmti,
130 jmethodID method, 224 jmethodID method,
@@ -135,16 +229,18 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
135 const void *compile_info) 229 const void *compile_info)
136{ 230{
137 jvmti_line_info_t *line_tab = NULL; 231 jvmti_line_info_t *line_tab = NULL;
232 char ** line_file_names = NULL;
138 jclass decl_class; 233 jclass decl_class;
139 char *class_sign = NULL; 234 char *class_sign = NULL;
140 char *func_name = NULL; 235 char *func_name = NULL;
141 char *func_sign = NULL; 236 char *func_sign = NULL;
142 char *file_name= NULL; 237 char *file_name = NULL;
143 char fn[PATH_MAX]; 238 char fn[PATH_MAX];
144 uint64_t addr = (uint64_t)(uintptr_t)code_addr; 239 uint64_t addr = (uint64_t)(uintptr_t)code_addr;
145 jvmtiError ret; 240 jvmtiError ret;
146 int nr_lines = 0; /* in line_tab[] */ 241 int nr_lines = 0; /* in line_tab[] */
147 size_t len; 242 size_t len;
243 int output_debug_info = 0;
148 244
149 ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method, 245 ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method,
150 &decl_class); 246 &decl_class);
@@ -158,6 +254,19 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
158 if (ret != JVMTI_ERROR_NONE) { 254 if (ret != JVMTI_ERROR_NONE) {
159 warnx("jvmti: cannot get line table for method"); 255 warnx("jvmti: cannot get line table for method");
160 nr_lines = 0; 256 nr_lines = 0;
257 } else if (nr_lines > 0) {
258 line_file_names = malloc(sizeof(char*) * nr_lines);
259 if (!line_file_names) {
260 warnx("jvmti: cannot allocate space for line table method names");
261 } else {
262 memset(line_file_names, 0, sizeof(char*) * nr_lines);
263 ret = fill_source_filenames(jvmti, nr_lines, line_tab, line_file_names);
264 if (ret != JVMTI_ERROR_NONE) {
265 warnx("jvmti: fill_source_filenames failed");
266 } else {
267 output_debug_info = 1;
268 }
269 }
161 } 270 }
162 } 271 }
163 272
@@ -181,33 +290,14 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
181 goto error; 290 goto error;
182 } 291 }
183 292
184 /* 293 copy_class_filename(class_sign, file_name, fn, PATH_MAX);
185 * Assume path name is class hierarchy, this is a common practice with Java programs 294
186 */
187 if (*class_sign == 'L') {
188 int j, i = 0;
189 char *p = strrchr(class_sign, '/');
190 if (p) {
191 /* drop the 'L' prefix and copy up to the final '/' */
192 for (i = 0; i < (p - class_sign); i++)
193 fn[i] = class_sign[i+1];
194 }
195 /*
196 * append file name, we use loops and not string ops to avoid modifying
197 * class_sign which is used later for the symbol name
198 */
199 for (j = 0; i < (PATH_MAX - 1) && file_name && j < strlen(file_name); j++, i++)
200 fn[i] = file_name[j];
201 fn[i] = '\0';
202 } else {
203 /* fallback case */
204 strcpy(fn, file_name);
205 }
206 /* 295 /*
207 * write source line info record if we have it 296 * write source line info record if we have it
208 */ 297 */
209 if (jvmti_write_debug_info(jvmti_agent, addr, fn, line_tab, nr_lines)) 298 if (output_debug_info)
210 warnx("jvmti: write_debug_info() failed"); 299 if (jvmti_write_debug_info(jvmti_agent, addr, nr_lines, line_tab, (const char * const *) line_file_names))
300 warnx("jvmti: write_debug_info() failed");
211 301
212 len = strlen(func_name) + strlen(class_sign) + strlen(func_sign) + 2; 302 len = strlen(func_name) + strlen(class_sign) + strlen(func_sign) + 2;
213 { 303 {
@@ -223,6 +313,13 @@ error:
223 (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign); 313 (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign);
224 (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name); 314 (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name);
225 free(line_tab); 315 free(line_tab);
316 while (line_file_names && (nr_lines > 0)) {
317 if (line_file_names[nr_lines - 1]) {
318 free(line_file_names[nr_lines - 1]);
319 }
320 nr_lines -= 1;
321 }
322 free(line_file_names);
226} 323}
227 324
228static void JNICALL 325static void JNICALL
diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
index c4d55919fac1..e0b85930dd77 100644
--- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
+++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
607fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) 607fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
608fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) 608fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
609fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) 609fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
610ff: 610ff: UD0
611EndTable 611EndTable
612 612
613Table: 3-byte opcode 1 (0x0f 0x38) 613Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
7177e: vpermt2d/q Vx,Hx,Wx (66),(ev) 7177e: vpermt2d/q Vx,Hx,Wx (66),(ev)
7187f: vpermt2ps/d Vx,Hx,Wx (66),(ev) 7187f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
71980: INVEPT Gy,Mdq (66) 71980: INVEPT Gy,Mdq (66)
72081: INVPID Gy,Mdq (66) 72081: INVVPID Gy,Mdq (66)
72182: INVPCID Gy,Mdq (66) 72182: INVPCID Gy,Mdq (66)
72283: vpmultishiftqb Vx,Hx,Wx (66),(ev) 72283: vpmultishiftqb Vx,Hx,Wx (66),(ev)
72388: vexpandps/d Vpd,Wpd (66),(ev) 72388: vexpandps/d Vpd,Wpd (66),(ev)
@@ -970,6 +970,15 @@ GrpTable: Grp9
970EndTable 970EndTable
971 971
972GrpTable: Grp10 972GrpTable: Grp10
973# all are UD1
9740: UD1
9751: UD1
9762: UD1
9773: UD1
9784: UD1
9795: UD1
9806: UD1
9817: UD1
973EndTable 982EndTable
974 983
975# Grp11A and Grp11B are expressed as Grp11 in Intel SDM 984# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index efd78b827b05..3a5cb5a6e94a 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -70,7 +70,7 @@ void perf_mmap__read_catchup(struct perf_mmap *md);
70static inline u64 perf_mmap__read_head(struct perf_mmap *mm) 70static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
71{ 71{
72 struct perf_event_mmap_page *pc = mm->base; 72 struct perf_event_mmap_page *pc = mm->base;
73 u64 head = ACCESS_ONCE(pc->data_head); 73 u64 head = READ_ONCE(pc->data_head);
74 rmb(); 74 rmb();
75 return head; 75 return head;
76} 76}
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 21a2d76b67dc..9316e648a880 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,19 +1,8 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2 2
3ifeq ($(srctree),)
4srctree := $(patsubst %/,%,$(dir $(CURDIR)))
5srctree := $(patsubst %/,%,$(dir $(srctree)))
6srctree := $(patsubst %/,%,$(dir $(srctree)))
7srctree := $(patsubst %/,%,$(dir $(srctree)))
8endif
9include $(srctree)/tools/scripts/Makefile.arch
10
11$(call detected_var,SRCARCH)
12
13LIBDIR := ../../../lib 3LIBDIR := ../../../lib
14BPFDIR := $(LIBDIR)/bpf 4BPFDIR := $(LIBDIR)/bpf
15APIDIR := ../../../include/uapi 5APIDIR := ../../../include/uapi
16ASMDIR:= ../../../arch/$(ARCH)/include/uapi
17GENDIR := ../../../../include/generated 6GENDIR := ../../../../include/generated
18GENHDR := $(GENDIR)/autoconf.h 7GENHDR := $(GENDIR)/autoconf.h
19 8
@@ -21,8 +10,8 @@ ifneq ($(wildcard $(GENHDR)),)
21 GENFLAGS := -DHAVE_GENHDR 10 GENFLAGS := -DHAVE_GENHDR
22endif 11endif
23 12
24CFLAGS += -Wall -O2 -I$(APIDIR) -I$(ASMDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include 13CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
25LDLIBS += -lcap -lelf 14LDLIBS += -lcap -lelf -lrt
26 15
27TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ 16TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
28 test_align test_verifier_log test_dev_cgroup 17 test_align test_verifier_log test_dev_cgroup
@@ -50,7 +39,7 @@ $(BPFOBJ): force
50CLANG ?= clang 39CLANG ?= clang
51LLC ?= llc 40LLC ?= llc
52 41
53PROBE := $(shell llc -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) 42PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
54 43
55# Let newer LLVM versions transparently probe the kernel for availability 44# Let newer LLVM versions transparently probe the kernel for availability
56# of full BPF instruction set. 45# of full BPF instruction set.
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 69427531408d..6761be18a91f 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -351,7 +351,7 @@ static void test_bpf_obj_id(void)
351 info_len != sizeof(struct bpf_map_info) || 351 info_len != sizeof(struct bpf_map_info) ||
352 strcmp((char *)map_infos[i].name, expected_map_name), 352 strcmp((char *)map_infos[i].name, expected_map_name),
353 "get-map-info(fd)", 353 "get-map-info(fd)",
354 "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n", 354 "err %d errno %d type %d(%d) info_len %u(%Zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
355 err, errno, 355 err, errno,
356 map_infos[i].type, BPF_MAP_TYPE_ARRAY, 356 map_infos[i].type, BPF_MAP_TYPE_ARRAY,
357 info_len, sizeof(struct bpf_map_info), 357 info_len, sizeof(struct bpf_map_info),
@@ -395,7 +395,7 @@ static void test_bpf_obj_id(void)
395 *(int *)prog_infos[i].map_ids != map_infos[i].id || 395 *(int *)prog_infos[i].map_ids != map_infos[i].id ||
396 strcmp((char *)prog_infos[i].name, expected_prog_name), 396 strcmp((char *)prog_infos[i].name, expected_prog_name),
397 "get-prog-info(fd)", 397 "get-prog-info(fd)",
398 "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n", 398 "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
399 err, errno, i, 399 err, errno, i,
400 prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER, 400 prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
401 info_len, sizeof(struct bpf_prog_info), 401 info_len, sizeof(struct bpf_prog_info),
@@ -463,7 +463,7 @@ static void test_bpf_obj_id(void)
463 memcmp(&prog_info, &prog_infos[i], info_len) || 463 memcmp(&prog_info, &prog_infos[i], info_len) ||
464 *(int *)prog_info.map_ids != saved_map_id, 464 *(int *)prog_info.map_ids != saved_map_id,
465 "get-prog-info(next_id->fd)", 465 "get-prog-info(next_id->fd)",
466 "err %d errno %d info_len %u(%lu) memcmp %d map_id %u(%u)\n", 466 "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n",
467 err, errno, info_len, sizeof(struct bpf_prog_info), 467 err, errno, info_len, sizeof(struct bpf_prog_info),
468 memcmp(&prog_info, &prog_infos[i], info_len), 468 memcmp(&prog_info, &prog_infos[i], info_len),
469 *(int *)prog_info.map_ids, saved_map_id); 469 *(int *)prog_info.map_ids, saved_map_id);
@@ -509,7 +509,7 @@ static void test_bpf_obj_id(void)
509 memcmp(&map_info, &map_infos[i], info_len) || 509 memcmp(&map_info, &map_infos[i], info_len) ||
510 array_value != array_magic_value, 510 array_value != array_magic_value,
511 "check get-map-info(next_id->fd)", 511 "check get-map-info(next_id->fd)",
512 "err %d errno %d info_len %u(%lu) memcmp %d array_value %llu(%llu)\n", 512 "err %d errno %d info_len %u(%Zu) memcmp %d array_value %llu(%llu)\n",
513 err, errno, info_len, sizeof(struct bpf_map_info), 513 err, errno, info_len, sizeof(struct bpf_map_info),
514 memcmp(&map_info, &map_infos[i], info_len), 514 memcmp(&map_info, &map_infos[i], info_len),
515 array_value, array_magic_value); 515 array_value, array_magic_value);
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3c64f30cf63c..b51017404c62 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -422,9 +422,7 @@ static struct bpf_test tests[] = {
422 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), 422 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
423 BPF_EXIT_INSN(), 423 BPF_EXIT_INSN(),
424 }, 424 },
425 .errstr_unpriv = "R1 subtraction from stack pointer", 425 .errstr = "R1 subtraction from stack pointer",
426 .result_unpriv = REJECT,
427 .errstr = "R1 invalid mem access",
428 .result = REJECT, 426 .result = REJECT,
429 }, 427 },
430 { 428 {
@@ -606,7 +604,6 @@ static struct bpf_test tests[] = {
606 }, 604 },
607 .errstr = "misaligned stack access", 605 .errstr = "misaligned stack access",
608 .result = REJECT, 606 .result = REJECT,
609 .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
610 }, 607 },
611 { 608 {
612 "invalid map_fd for function call", 609 "invalid map_fd for function call",
@@ -1797,7 +1794,6 @@ static struct bpf_test tests[] = {
1797 }, 1794 },
1798 .result = REJECT, 1795 .result = REJECT,
1799 .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8", 1796 .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8",
1800 .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
1801 }, 1797 },
1802 { 1798 {
1803 "PTR_TO_STACK store/load - bad alignment on reg", 1799 "PTR_TO_STACK store/load - bad alignment on reg",
@@ -1810,7 +1806,6 @@ static struct bpf_test tests[] = {
1810 }, 1806 },
1811 .result = REJECT, 1807 .result = REJECT,
1812 .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8", 1808 .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8",
1813 .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
1814 }, 1809 },
1815 { 1810 {
1816 "PTR_TO_STACK store/load - out of bounds low", 1811 "PTR_TO_STACK store/load - out of bounds low",
@@ -1862,9 +1857,8 @@ static struct bpf_test tests[] = {
1862 BPF_MOV64_IMM(BPF_REG_0, 0), 1857 BPF_MOV64_IMM(BPF_REG_0, 0),
1863 BPF_EXIT_INSN(), 1858 BPF_EXIT_INSN(),
1864 }, 1859 },
1865 .result = ACCEPT, 1860 .result = REJECT,
1866 .result_unpriv = REJECT, 1861 .errstr = "R1 pointer += pointer",
1867 .errstr_unpriv = "R1 pointer += pointer",
1868 }, 1862 },
1869 { 1863 {
1870 "unpriv: neg pointer", 1864 "unpriv: neg pointer",
@@ -2592,7 +2586,8 @@ static struct bpf_test tests[] = {
2592 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 2586 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
2593 offsetof(struct __sk_buff, data)), 2587 offsetof(struct __sk_buff, data)),
2594 BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4), 2588 BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4),
2595 BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), 2589 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
2590 offsetof(struct __sk_buff, len)),
2596 BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49), 2591 BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49),
2597 BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49), 2592 BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49),
2598 BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2), 2593 BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
@@ -2899,7 +2894,7 @@ static struct bpf_test tests[] = {
2899 BPF_MOV64_IMM(BPF_REG_0, 0), 2894 BPF_MOV64_IMM(BPF_REG_0, 0),
2900 BPF_EXIT_INSN(), 2895 BPF_EXIT_INSN(),
2901 }, 2896 },
2902 .errstr = "invalid access to packet", 2897 .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
2903 .result = REJECT, 2898 .result = REJECT,
2904 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 2899 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
2905 }, 2900 },
@@ -3885,9 +3880,7 @@ static struct bpf_test tests[] = {
3885 BPF_EXIT_INSN(), 3880 BPF_EXIT_INSN(),
3886 }, 3881 },
3887 .fixup_map2 = { 3, 11 }, 3882 .fixup_map2 = { 3, 11 },
3888 .errstr_unpriv = "R0 pointer += pointer", 3883 .errstr = "R0 pointer += pointer",
3889 .errstr = "R0 invalid mem access 'inv'",
3890 .result_unpriv = REJECT,
3891 .result = REJECT, 3884 .result = REJECT,
3892 .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, 3885 .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
3893 }, 3886 },
@@ -3928,7 +3921,7 @@ static struct bpf_test tests[] = {
3928 BPF_EXIT_INSN(), 3921 BPF_EXIT_INSN(),
3929 }, 3922 },
3930 .fixup_map1 = { 4 }, 3923 .fixup_map1 = { 4 },
3931 .errstr = "R4 invalid mem access", 3924 .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
3932 .result = REJECT, 3925 .result = REJECT,
3933 .prog_type = BPF_PROG_TYPE_SCHED_CLS 3926 .prog_type = BPF_PROG_TYPE_SCHED_CLS
3934 }, 3927 },
@@ -3949,7 +3942,7 @@ static struct bpf_test tests[] = {
3949 BPF_EXIT_INSN(), 3942 BPF_EXIT_INSN(),
3950 }, 3943 },
3951 .fixup_map1 = { 4 }, 3944 .fixup_map1 = { 4 },
3952 .errstr = "R4 invalid mem access", 3945 .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
3953 .result = REJECT, 3946 .result = REJECT,
3954 .prog_type = BPF_PROG_TYPE_SCHED_CLS 3947 .prog_type = BPF_PROG_TYPE_SCHED_CLS
3955 }, 3948 },
@@ -3970,7 +3963,7 @@ static struct bpf_test tests[] = {
3970 BPF_EXIT_INSN(), 3963 BPF_EXIT_INSN(),
3971 }, 3964 },
3972 .fixup_map1 = { 4 }, 3965 .fixup_map1 = { 4 },
3973 .errstr = "R4 invalid mem access", 3966 .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
3974 .result = REJECT, 3967 .result = REJECT,
3975 .prog_type = BPF_PROG_TYPE_SCHED_CLS 3968 .prog_type = BPF_PROG_TYPE_SCHED_CLS
3976 }, 3969 },
@@ -5195,10 +5188,8 @@ static struct bpf_test tests[] = {
5195 BPF_EXIT_INSN(), 5188 BPF_EXIT_INSN(),
5196 }, 5189 },
5197 .fixup_map2 = { 3 }, 5190 .fixup_map2 = { 3 },
5198 .errstr_unpriv = "R0 bitwise operator &= on pointer", 5191 .errstr = "R0 bitwise operator &= on pointer",
5199 .errstr = "invalid mem access 'inv'",
5200 .result = REJECT, 5192 .result = REJECT,
5201 .result_unpriv = REJECT,
5202 }, 5193 },
5203 { 5194 {
5204 "map element value illegal alu op, 2", 5195 "map element value illegal alu op, 2",
@@ -5214,10 +5205,8 @@ static struct bpf_test tests[] = {
5214 BPF_EXIT_INSN(), 5205 BPF_EXIT_INSN(),
5215 }, 5206 },
5216 .fixup_map2 = { 3 }, 5207 .fixup_map2 = { 3 },
5217 .errstr_unpriv = "R0 32-bit pointer arithmetic prohibited", 5208 .errstr = "R0 32-bit pointer arithmetic prohibited",
5218 .errstr = "invalid mem access 'inv'",
5219 .result = REJECT, 5209 .result = REJECT,
5220 .result_unpriv = REJECT,
5221 }, 5210 },
5222 { 5211 {
5223 "map element value illegal alu op, 3", 5212 "map element value illegal alu op, 3",
@@ -5233,10 +5222,8 @@ static struct bpf_test tests[] = {
5233 BPF_EXIT_INSN(), 5222 BPF_EXIT_INSN(),
5234 }, 5223 },
5235 .fixup_map2 = { 3 }, 5224 .fixup_map2 = { 3 },
5236 .errstr_unpriv = "R0 pointer arithmetic with /= operator", 5225 .errstr = "R0 pointer arithmetic with /= operator",
5237 .errstr = "invalid mem access 'inv'",
5238 .result = REJECT, 5226 .result = REJECT,
5239 .result_unpriv = REJECT,
5240 }, 5227 },
5241 { 5228 {
5242 "map element value illegal alu op, 4", 5229 "map element value illegal alu op, 4",
@@ -6019,8 +6006,7 @@ static struct bpf_test tests[] = {
6019 BPF_EXIT_INSN(), 6006 BPF_EXIT_INSN(),
6020 }, 6007 },
6021 .fixup_map_in_map = { 3 }, 6008 .fixup_map_in_map = { 3 },
6022 .errstr = "R1 type=inv expected=map_ptr", 6009 .errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
6023 .errstr_unpriv = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
6024 .result = REJECT, 6010 .result = REJECT,
6025 }, 6011 },
6026 { 6012 {
@@ -6117,6 +6103,30 @@ static struct bpf_test tests[] = {
6117 .result = ACCEPT, 6103 .result = ACCEPT,
6118 }, 6104 },
6119 { 6105 {
6106 "ld_abs: tests on r6 and skb data reload helper",
6107 .insns = {
6108 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
6109 BPF_LD_ABS(BPF_B, 0),
6110 BPF_LD_ABS(BPF_H, 0),
6111 BPF_LD_ABS(BPF_W, 0),
6112 BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
6113 BPF_MOV64_IMM(BPF_REG_6, 0),
6114 BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
6115 BPF_MOV64_IMM(BPF_REG_2, 1),
6116 BPF_MOV64_IMM(BPF_REG_3, 2),
6117 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6118 BPF_FUNC_skb_vlan_push),
6119 BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
6120 BPF_LD_ABS(BPF_B, 0),
6121 BPF_LD_ABS(BPF_H, 0),
6122 BPF_LD_ABS(BPF_W, 0),
6123 BPF_MOV64_IMM(BPF_REG_0, 42),
6124 BPF_EXIT_INSN(),
6125 },
6126 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
6127 .result = ACCEPT,
6128 },
6129 {
6120 "ld_ind: check calling conv, r1", 6130 "ld_ind: check calling conv, r1",
6121 .insns = { 6131 .insns = {
6122 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), 6132 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
@@ -6300,7 +6310,7 @@ static struct bpf_test tests[] = {
6300 BPF_EXIT_INSN(), 6310 BPF_EXIT_INSN(),
6301 }, 6311 },
6302 .fixup_map1 = { 3 }, 6312 .fixup_map1 = { 3 },
6303 .errstr = "R0 min value is negative", 6313 .errstr = "unbounded min value",
6304 .result = REJECT, 6314 .result = REJECT,
6305 }, 6315 },
6306 { 6316 {
@@ -6324,7 +6334,7 @@ static struct bpf_test tests[] = {
6324 BPF_EXIT_INSN(), 6334 BPF_EXIT_INSN(),
6325 }, 6335 },
6326 .fixup_map1 = { 3 }, 6336 .fixup_map1 = { 3 },
6327 .errstr = "R0 min value is negative", 6337 .errstr = "unbounded min value",
6328 .result = REJECT, 6338 .result = REJECT,
6329 }, 6339 },
6330 { 6340 {
@@ -6350,7 +6360,7 @@ static struct bpf_test tests[] = {
6350 BPF_EXIT_INSN(), 6360 BPF_EXIT_INSN(),
6351 }, 6361 },
6352 .fixup_map1 = { 3 }, 6362 .fixup_map1 = { 3 },
6353 .errstr = "R8 invalid mem access 'inv'", 6363 .errstr = "unbounded min value",
6354 .result = REJECT, 6364 .result = REJECT,
6355 }, 6365 },
6356 { 6366 {
@@ -6375,7 +6385,7 @@ static struct bpf_test tests[] = {
6375 BPF_EXIT_INSN(), 6385 BPF_EXIT_INSN(),
6376 }, 6386 },
6377 .fixup_map1 = { 3 }, 6387 .fixup_map1 = { 3 },
6378 .errstr = "R8 invalid mem access 'inv'", 6388 .errstr = "unbounded min value",
6379 .result = REJECT, 6389 .result = REJECT,
6380 }, 6390 },
6381 { 6391 {
@@ -6423,7 +6433,7 @@ static struct bpf_test tests[] = {
6423 BPF_EXIT_INSN(), 6433 BPF_EXIT_INSN(),
6424 }, 6434 },
6425 .fixup_map1 = { 3 }, 6435 .fixup_map1 = { 3 },
6426 .errstr = "R0 min value is negative", 6436 .errstr = "unbounded min value",
6427 .result = REJECT, 6437 .result = REJECT,
6428 }, 6438 },
6429 { 6439 {
@@ -6494,7 +6504,7 @@ static struct bpf_test tests[] = {
6494 BPF_EXIT_INSN(), 6504 BPF_EXIT_INSN(),
6495 }, 6505 },
6496 .fixup_map1 = { 3 }, 6506 .fixup_map1 = { 3 },
6497 .errstr = "R0 min value is negative", 6507 .errstr = "unbounded min value",
6498 .result = REJECT, 6508 .result = REJECT,
6499 }, 6509 },
6500 { 6510 {
@@ -6545,7 +6555,7 @@ static struct bpf_test tests[] = {
6545 BPF_EXIT_INSN(), 6555 BPF_EXIT_INSN(),
6546 }, 6556 },
6547 .fixup_map1 = { 3 }, 6557 .fixup_map1 = { 3 },
6548 .errstr = "R0 min value is negative", 6558 .errstr = "unbounded min value",
6549 .result = REJECT, 6559 .result = REJECT,
6550 }, 6560 },
6551 { 6561 {
@@ -6572,7 +6582,7 @@ static struct bpf_test tests[] = {
6572 BPF_EXIT_INSN(), 6582 BPF_EXIT_INSN(),
6573 }, 6583 },
6574 .fixup_map1 = { 3 }, 6584 .fixup_map1 = { 3 },
6575 .errstr = "R0 min value is negative", 6585 .errstr = "unbounded min value",
6576 .result = REJECT, 6586 .result = REJECT,
6577 }, 6587 },
6578 { 6588 {
@@ -6598,7 +6608,7 @@ static struct bpf_test tests[] = {
6598 BPF_EXIT_INSN(), 6608 BPF_EXIT_INSN(),
6599 }, 6609 },
6600 .fixup_map1 = { 3 }, 6610 .fixup_map1 = { 3 },
6601 .errstr = "R0 min value is negative", 6611 .errstr = "unbounded min value",
6602 .result = REJECT, 6612 .result = REJECT,
6603 }, 6613 },
6604 { 6614 {
@@ -6627,7 +6637,7 @@ static struct bpf_test tests[] = {
6627 BPF_EXIT_INSN(), 6637 BPF_EXIT_INSN(),
6628 }, 6638 },
6629 .fixup_map1 = { 3 }, 6639 .fixup_map1 = { 3 },
6630 .errstr = "R0 min value is negative", 6640 .errstr = "unbounded min value",
6631 .result = REJECT, 6641 .result = REJECT,
6632 }, 6642 },
6633 { 6643 {
@@ -6657,7 +6667,7 @@ static struct bpf_test tests[] = {
6657 BPF_JMP_IMM(BPF_JA, 0, 0, -7), 6667 BPF_JMP_IMM(BPF_JA, 0, 0, -7),
6658 }, 6668 },
6659 .fixup_map1 = { 4 }, 6669 .fixup_map1 = { 4 },
6660 .errstr = "R0 min value is negative", 6670 .errstr = "unbounded min value",
6661 .result = REJECT, 6671 .result = REJECT,
6662 }, 6672 },
6663 { 6673 {
@@ -6685,8 +6695,7 @@ static struct bpf_test tests[] = {
6685 BPF_EXIT_INSN(), 6695 BPF_EXIT_INSN(),
6686 }, 6696 },
6687 .fixup_map1 = { 3 }, 6697 .fixup_map1 = { 3 },
6688 .errstr_unpriv = "R0 pointer comparison prohibited", 6698 .errstr = "unbounded min value",
6689 .errstr = "R0 min value is negative",
6690 .result = REJECT, 6699 .result = REJECT,
6691 .result_unpriv = REJECT, 6700 .result_unpriv = REJECT,
6692 }, 6701 },
@@ -6742,6 +6751,462 @@ static struct bpf_test tests[] = {
6742 .result = REJECT, 6751 .result = REJECT,
6743 }, 6752 },
6744 { 6753 {
6754 "bounds check based on zero-extended MOV",
6755 .insns = {
6756 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6757 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6758 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6759 BPF_LD_MAP_FD(BPF_REG_1, 0),
6760 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6761 BPF_FUNC_map_lookup_elem),
6762 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
6763 /* r2 = 0x0000'0000'ffff'ffff */
6764 BPF_MOV32_IMM(BPF_REG_2, 0xffffffff),
6765 /* r2 = 0 */
6766 BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
6767 /* no-op */
6768 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
6769 /* access at offset 0 */
6770 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6771 /* exit */
6772 BPF_MOV64_IMM(BPF_REG_0, 0),
6773 BPF_EXIT_INSN(),
6774 },
6775 .fixup_map1 = { 3 },
6776 .result = ACCEPT
6777 },
6778 {
6779 "bounds check based on sign-extended MOV. test1",
6780 .insns = {
6781 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6782 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6783 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6784 BPF_LD_MAP_FD(BPF_REG_1, 0),
6785 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6786 BPF_FUNC_map_lookup_elem),
6787 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
6788 /* r2 = 0xffff'ffff'ffff'ffff */
6789 BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
6790 /* r2 = 0xffff'ffff */
6791 BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
6792 /* r0 = <oob pointer> */
6793 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
6794 /* access to OOB pointer */
6795 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6796 /* exit */
6797 BPF_MOV64_IMM(BPF_REG_0, 0),
6798 BPF_EXIT_INSN(),
6799 },
6800 .fixup_map1 = { 3 },
6801 .errstr = "map_value pointer and 4294967295",
6802 .result = REJECT
6803 },
6804 {
6805 "bounds check based on sign-extended MOV. test2",
6806 .insns = {
6807 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6808 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6809 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6810 BPF_LD_MAP_FD(BPF_REG_1, 0),
6811 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6812 BPF_FUNC_map_lookup_elem),
6813 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
6814 /* r2 = 0xffff'ffff'ffff'ffff */
6815 BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
6816 /* r2 = 0xfff'ffff */
6817 BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36),
6818 /* r0 = <oob pointer> */
6819 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
6820 /* access to OOB pointer */
6821 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6822 /* exit */
6823 BPF_MOV64_IMM(BPF_REG_0, 0),
6824 BPF_EXIT_INSN(),
6825 },
6826 .fixup_map1 = { 3 },
6827 .errstr = "R0 min value is outside of the array range",
6828 .result = REJECT
6829 },
6830 {
6831 "bounds check based on reg_off + var_off + insn_off. test1",
6832 .insns = {
6833 BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
6834 offsetof(struct __sk_buff, mark)),
6835 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6836 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6837 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6838 BPF_LD_MAP_FD(BPF_REG_1, 0),
6839 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6840 BPF_FUNC_map_lookup_elem),
6841 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
6842 BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
6843 BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1),
6844 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
6845 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
6846 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
6847 BPF_MOV64_IMM(BPF_REG_0, 0),
6848 BPF_EXIT_INSN(),
6849 },
6850 .fixup_map1 = { 4 },
6851 .errstr = "value_size=8 off=1073741825",
6852 .result = REJECT,
6853 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
6854 },
6855 {
6856 "bounds check based on reg_off + var_off + insn_off. test2",
6857 .insns = {
6858 BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
6859 offsetof(struct __sk_buff, mark)),
6860 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6861 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6862 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6863 BPF_LD_MAP_FD(BPF_REG_1, 0),
6864 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6865 BPF_FUNC_map_lookup_elem),
6866 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
6867 BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
6868 BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1),
6869 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
6870 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
6871 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
6872 BPF_MOV64_IMM(BPF_REG_0, 0),
6873 BPF_EXIT_INSN(),
6874 },
6875 .fixup_map1 = { 4 },
6876 .errstr = "value 1073741823",
6877 .result = REJECT,
6878 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
6879 },
6880 {
6881 "bounds check after truncation of non-boundary-crossing range",
6882 .insns = {
6883 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6884 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6885 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6886 BPF_LD_MAP_FD(BPF_REG_1, 0),
6887 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6888 BPF_FUNC_map_lookup_elem),
6889 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
6890 /* r1 = [0x00, 0xff] */
6891 BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
6892 BPF_MOV64_IMM(BPF_REG_2, 1),
6893 /* r2 = 0x10'0000'0000 */
6894 BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36),
6895 /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */
6896 BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
6897 /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */
6898 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
6899 /* r1 = [0x00, 0xff] */
6900 BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff),
6901 /* r1 = 0 */
6902 BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
6903 /* no-op */
6904 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
6905 /* access at offset 0 */
6906 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6907 /* exit */
6908 BPF_MOV64_IMM(BPF_REG_0, 0),
6909 BPF_EXIT_INSN(),
6910 },
6911 .fixup_map1 = { 3 },
6912 .result = ACCEPT
6913 },
6914 {
6915 "bounds check after truncation of boundary-crossing range (1)",
6916 .insns = {
6917 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6918 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6919 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6920 BPF_LD_MAP_FD(BPF_REG_1, 0),
6921 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6922 BPF_FUNC_map_lookup_elem),
6923 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
6924 /* r1 = [0x00, 0xff] */
6925 BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
6926 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
6927 /* r1 = [0xffff'ff80, 0x1'0000'007f] */
6928 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
6929 /* r1 = [0xffff'ff80, 0xffff'ffff] or
6930 * [0x0000'0000, 0x0000'007f]
6931 */
6932 BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0),
6933 BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
6934 /* r1 = [0x00, 0xff] or
6935 * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
6936 */
6937 BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
6938 /* r1 = 0 or
6939 * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
6940 */
6941 BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
6942 /* no-op or OOB pointer computation */
6943 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
6944 /* potentially OOB access */
6945 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6946 /* exit */
6947 BPF_MOV64_IMM(BPF_REG_0, 0),
6948 BPF_EXIT_INSN(),
6949 },
6950 .fixup_map1 = { 3 },
6951 /* not actually fully unbounded, but the bound is very high */
6952 .errstr = "R0 unbounded memory access",
6953 .result = REJECT
6954 },
6955 {
6956 "bounds check after truncation of boundary-crossing range (2)",
6957 .insns = {
6958 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6959 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6960 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6961 BPF_LD_MAP_FD(BPF_REG_1, 0),
6962 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6963 BPF_FUNC_map_lookup_elem),
6964 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
6965 /* r1 = [0x00, 0xff] */
6966 BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
6967 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
6968 /* r1 = [0xffff'ff80, 0x1'0000'007f] */
6969 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
6970 /* r1 = [0xffff'ff80, 0xffff'ffff] or
6971 * [0x0000'0000, 0x0000'007f]
6972 * difference to previous test: truncation via MOV32
6973 * instead of ALU32.
6974 */
6975 BPF_MOV32_REG(BPF_REG_1, BPF_REG_1),
6976 BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
6977 /* r1 = [0x00, 0xff] or
6978 * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
6979 */
6980 BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
6981 /* r1 = 0 or
6982 * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
6983 */
6984 BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
6985 /* no-op or OOB pointer computation */
6986 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
6987 /* potentially OOB access */
6988 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6989 /* exit */
6990 BPF_MOV64_IMM(BPF_REG_0, 0),
6991 BPF_EXIT_INSN(),
6992 },
6993 .fixup_map1 = { 3 },
6994 /* not actually fully unbounded, but the bound is very high */
6995 .errstr = "R0 unbounded memory access",
6996 .result = REJECT
6997 },
6998 {
6999 "bounds check after wrapping 32-bit addition",
7000 .insns = {
7001 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7002 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7003 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7004 BPF_LD_MAP_FD(BPF_REG_1, 0),
7005 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7006 BPF_FUNC_map_lookup_elem),
7007 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
7008 /* r1 = 0x7fff'ffff */
7009 BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff),
7010 /* r1 = 0xffff'fffe */
7011 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
7012 /* r1 = 0 */
7013 BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2),
7014 /* no-op */
7015 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
7016 /* access at offset 0 */
7017 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
7018 /* exit */
7019 BPF_MOV64_IMM(BPF_REG_0, 0),
7020 BPF_EXIT_INSN(),
7021 },
7022 .fixup_map1 = { 3 },
7023 .result = ACCEPT
7024 },
7025 {
7026 "bounds check after shift with oversized count operand",
7027 .insns = {
7028 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7029 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7030 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7031 BPF_LD_MAP_FD(BPF_REG_1, 0),
7032 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7033 BPF_FUNC_map_lookup_elem),
7034 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
7035 BPF_MOV64_IMM(BPF_REG_2, 32),
7036 BPF_MOV64_IMM(BPF_REG_1, 1),
7037 /* r1 = (u32)1 << (u32)32 = ? */
7038 BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2),
7039 /* r1 = [0x0000, 0xffff] */
7040 BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff),
7041 /* computes unknown pointer, potentially OOB */
7042 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
7043 /* potentially OOB access */
7044 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
7045 /* exit */
7046 BPF_MOV64_IMM(BPF_REG_0, 0),
7047 BPF_EXIT_INSN(),
7048 },
7049 .fixup_map1 = { 3 },
7050 .errstr = "R0 max value is outside of the array range",
7051 .result = REJECT
7052 },
7053 {
7054 "bounds check after right shift of maybe-negative number",
7055 .insns = {
7056 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7057 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7058 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7059 BPF_LD_MAP_FD(BPF_REG_1, 0),
7060 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7061 BPF_FUNC_map_lookup_elem),
7062 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
7063 /* r1 = [0x00, 0xff] */
7064 BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
7065 /* r1 = [-0x01, 0xfe] */
7066 BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
7067 /* r1 = 0 or 0xff'ffff'ffff'ffff */
7068 BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
7069 /* r1 = 0 or 0xffff'ffff'ffff */
7070 BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
7071 /* computes unknown pointer, potentially OOB */
7072 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
7073 /* potentially OOB access */
7074 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
7075 /* exit */
7076 BPF_MOV64_IMM(BPF_REG_0, 0),
7077 BPF_EXIT_INSN(),
7078 },
7079 .fixup_map1 = { 3 },
7080 .errstr = "R0 unbounded memory access",
7081 .result = REJECT
7082 },
7083 {
7084 "bounds check map access with off+size signed 32bit overflow. test1",
7085 .insns = {
7086 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7087 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7088 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7089 BPF_LD_MAP_FD(BPF_REG_1, 0),
7090 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7091 BPF_FUNC_map_lookup_elem),
7092 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
7093 BPF_EXIT_INSN(),
7094 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe),
7095 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
7096 BPF_JMP_A(0),
7097 BPF_EXIT_INSN(),
7098 },
7099 .fixup_map1 = { 3 },
7100 .errstr = "map_value pointer and 2147483646",
7101 .result = REJECT
7102 },
7103 {
7104 "bounds check map access with off+size signed 32bit overflow. test2",
7105 .insns = {
7106 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7107 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7108 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7109 BPF_LD_MAP_FD(BPF_REG_1, 0),
7110 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7111 BPF_FUNC_map_lookup_elem),
7112 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
7113 BPF_EXIT_INSN(),
7114 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
7115 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
7116 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
7117 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
7118 BPF_JMP_A(0),
7119 BPF_EXIT_INSN(),
7120 },
7121 .fixup_map1 = { 3 },
7122 .errstr = "pointer offset 1073741822",
7123 .result = REJECT
7124 },
7125 {
7126 "bounds check map access with off+size signed 32bit overflow. test3",
7127 .insns = {
7128 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7129 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7130 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7131 BPF_LD_MAP_FD(BPF_REG_1, 0),
7132 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7133 BPF_FUNC_map_lookup_elem),
7134 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
7135 BPF_EXIT_INSN(),
7136 BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
7137 BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
7138 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
7139 BPF_JMP_A(0),
7140 BPF_EXIT_INSN(),
7141 },
7142 .fixup_map1 = { 3 },
7143 .errstr = "pointer offset -1073741822",
7144 .result = REJECT
7145 },
7146 {
7147 "bounds check map access with off+size signed 32bit overflow. test4",
7148 .insns = {
7149 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7150 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7151 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7152 BPF_LD_MAP_FD(BPF_REG_1, 0),
7153 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7154 BPF_FUNC_map_lookup_elem),
7155 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
7156 BPF_EXIT_INSN(),
7157 BPF_MOV64_IMM(BPF_REG_1, 1000000),
7158 BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000),
7159 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
7160 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
7161 BPF_JMP_A(0),
7162 BPF_EXIT_INSN(),
7163 },
7164 .fixup_map1 = { 3 },
7165 .errstr = "map_value pointer and 1000000000000",
7166 .result = REJECT
7167 },
7168 {
7169 "pointer/scalar confusion in state equality check (way 1)",
7170 .insns = {
7171 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7172 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7173 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7174 BPF_LD_MAP_FD(BPF_REG_1, 0),
7175 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7176 BPF_FUNC_map_lookup_elem),
7177 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
7178 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
7179 BPF_JMP_A(1),
7180 BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
7181 BPF_JMP_A(0),
7182 BPF_EXIT_INSN(),
7183 },
7184 .fixup_map1 = { 3 },
7185 .result = ACCEPT,
7186 .result_unpriv = REJECT,
7187 .errstr_unpriv = "R0 leaks addr as return value"
7188 },
7189 {
7190 "pointer/scalar confusion in state equality check (way 2)",
7191 .insns = {
7192 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7193 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7194 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7195 BPF_LD_MAP_FD(BPF_REG_1, 0),
7196 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7197 BPF_FUNC_map_lookup_elem),
7198 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
7199 BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
7200 BPF_JMP_A(1),
7201 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
7202 BPF_EXIT_INSN(),
7203 },
7204 .fixup_map1 = { 3 },
7205 .result = ACCEPT,
7206 .result_unpriv = REJECT,
7207 .errstr_unpriv = "R0 leaks addr as return value"
7208 },
7209 {
6745 "variable-offset ctx access", 7210 "variable-offset ctx access",
6746 .insns = { 7211 .insns = {
6747 /* Get an unknown value */ 7212 /* Get an unknown value */
@@ -6783,6 +7248,71 @@ static struct bpf_test tests[] = {
6783 .prog_type = BPF_PROG_TYPE_LWT_IN, 7248 .prog_type = BPF_PROG_TYPE_LWT_IN,
6784 }, 7249 },
6785 { 7250 {
7251 "indirect variable-offset stack access",
7252 .insns = {
7253 /* Fill the top 8 bytes of the stack */
7254 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7255 /* Get an unknown value */
7256 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
7257 /* Make it small and 4-byte aligned */
7258 BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
7259 BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
7260 /* add it to fp. We now have either fp-4 or fp-8, but
7261 * we don't know which
7262 */
7263 BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
7264 /* dereference it indirectly */
7265 BPF_LD_MAP_FD(BPF_REG_1, 0),
7266 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7267 BPF_FUNC_map_lookup_elem),
7268 BPF_MOV64_IMM(BPF_REG_0, 0),
7269 BPF_EXIT_INSN(),
7270 },
7271 .fixup_map1 = { 5 },
7272 .errstr = "variable stack read R2",
7273 .result = REJECT,
7274 .prog_type = BPF_PROG_TYPE_LWT_IN,
7275 },
7276 {
7277 "direct stack access with 32-bit wraparound. test1",
7278 .insns = {
7279 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
7280 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
7281 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
7282 BPF_MOV32_IMM(BPF_REG_0, 0),
7283 BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
7284 BPF_EXIT_INSN()
7285 },
7286 .errstr = "fp pointer and 2147483647",
7287 .result = REJECT
7288 },
7289 {
7290 "direct stack access with 32-bit wraparound. test2",
7291 .insns = {
7292 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
7293 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
7294 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
7295 BPF_MOV32_IMM(BPF_REG_0, 0),
7296 BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
7297 BPF_EXIT_INSN()
7298 },
7299 .errstr = "fp pointer and 1073741823",
7300 .result = REJECT
7301 },
7302 {
7303 "direct stack access with 32-bit wraparound. test3",
7304 .insns = {
7305 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
7306 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
7307 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
7308 BPF_MOV32_IMM(BPF_REG_0, 0),
7309 BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
7310 BPF_EXIT_INSN()
7311 },
7312 .errstr = "fp pointer offset 1073741822",
7313 .result = REJECT
7314 },
7315 {
6786 "liveness pruning and write screening", 7316 "liveness pruning and write screening",
6787 .insns = { 7317 .insns = {
6788 /* Get an unknown value */ 7318 /* Get an unknown value */
@@ -7104,6 +7634,19 @@ static struct bpf_test tests[] = {
7104 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 7634 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
7105 }, 7635 },
7106 { 7636 {
7637 "pkt_end - pkt_start is allowed",
7638 .insns = {
7639 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
7640 offsetof(struct __sk_buff, data_end)),
7641 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
7642 offsetof(struct __sk_buff, data)),
7643 BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
7644 BPF_EXIT_INSN(),
7645 },
7646 .result = ACCEPT,
7647 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
7648 },
7649 {
7107 "XDP pkt read, pkt_end mangling, bad access 1", 7650 "XDP pkt read, pkt_end mangling, bad access 1",
7108 .insns = { 7651 .insns = {
7109 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 7652 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
@@ -7118,7 +7661,7 @@ static struct bpf_test tests[] = {
7118 BPF_MOV64_IMM(BPF_REG_0, 0), 7661 BPF_MOV64_IMM(BPF_REG_0, 0),
7119 BPF_EXIT_INSN(), 7662 BPF_EXIT_INSN(),
7120 }, 7663 },
7121 .errstr = "R1 offset is outside of the packet", 7664 .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
7122 .result = REJECT, 7665 .result = REJECT,
7123 .prog_type = BPF_PROG_TYPE_XDP, 7666 .prog_type = BPF_PROG_TYPE_XDP,
7124 }, 7667 },
@@ -7137,7 +7680,7 @@ static struct bpf_test tests[] = {
7137 BPF_MOV64_IMM(BPF_REG_0, 0), 7680 BPF_MOV64_IMM(BPF_REG_0, 0),
7138 BPF_EXIT_INSN(), 7681 BPF_EXIT_INSN(),
7139 }, 7682 },
7140 .errstr = "R1 offset is outside of the packet", 7683 .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
7141 .result = REJECT, 7684 .result = REJECT,
7142 .prog_type = BPF_PROG_TYPE_XDP, 7685 .prog_type = BPF_PROG_TYPE_XDP,
7143 }, 7686 },
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index e57b4ac40e72..7177bea1fdfa 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -1,3 +1,4 @@
1CONFIG_USER_NS=y 1CONFIG_USER_NS=y
2CONFIG_BPF_SYSCALL=y 2CONFIG_BPF_SYSCALL=y
3CONFIG_TEST_BPF=m 3CONFIG_TEST_BPF=m
4CONFIG_NUMA=y
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 66e5ce5b91f0..1aef72df20a1 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -122,8 +122,7 @@ static void check_valid_segment(uint16_t index, int ldt,
122 * NB: Different Linux versions do different things with the 122 * NB: Different Linux versions do different things with the
123 * accessed bit in set_thread_area(). 123 * accessed bit in set_thread_area().
124 */ 124 */
125 if (ar != expected_ar && 125 if (ar != expected_ar && ar != (expected_ar | AR_ACCESSED)) {
126 (ldt || ar != (expected_ar | AR_ACCESSED))) {
127 printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n", 126 printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n",
128 (ldt ? "LDT" : "GDT"), index, ar, expected_ar); 127 (ldt ? "LDT" : "GDT"), index, ar, expected_ar);
129 nerrs++; 128 nerrs++;
@@ -627,13 +626,10 @@ static void do_multicpu_tests(void)
627static int finish_exec_test(void) 626static int finish_exec_test(void)
628{ 627{
629 /* 628 /*
630 * In a sensible world, this would be check_invalid_segment(0, 1); 629 * Older kernel versions did inherit the LDT on exec() which is
631 * For better or for worse, though, the LDT is inherited across exec. 630 * wrong because exec() starts from a clean state.
632 * We can probably change this safely, but for now we test it.
633 */ 631 */
634 check_valid_segment(0, 1, 632 check_invalid_segment(0, 1);
635 AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB,
636 42, true);
637 633
638 return nerrs ? 1 : 0; 634 return nerrs ? 1 : 0;
639} 635}
diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c
index 8a1cd1616de4..c9c81614a66a 100644
--- a/tools/usb/usbip/libsrc/vhci_driver.c
+++ b/tools/usb/usbip/libsrc/vhci_driver.c
@@ -50,14 +50,14 @@ static int parse_status(const char *value)
50 50
51 while (*c != '\0') { 51 while (*c != '\0') {
52 int port, status, speed, devid; 52 int port, status, speed, devid;
53 unsigned long socket; 53 int sockfd;
54 char lbusid[SYSFS_BUS_ID_SIZE]; 54 char lbusid[SYSFS_BUS_ID_SIZE];
55 struct usbip_imported_device *idev; 55 struct usbip_imported_device *idev;
56 char hub[3]; 56 char hub[3];
57 57
58 ret = sscanf(c, "%2s %d %d %d %x %lx %31s\n", 58 ret = sscanf(c, "%2s %d %d %d %x %u %31s\n",
59 hub, &port, &status, &speed, 59 hub, &port, &status, &speed,
60 &devid, &socket, lbusid); 60 &devid, &sockfd, lbusid);
61 61
62 if (ret < 5) { 62 if (ret < 5) {
63 dbg("sscanf failed: %d", ret); 63 dbg("sscanf failed: %d", ret);
@@ -66,7 +66,7 @@ static int parse_status(const char *value)
66 66
67 dbg("hub %s port %d status %d speed %d devid %x", 67 dbg("hub %s port %d status %d speed %d devid %x",
68 hub, port, status, speed, devid); 68 hub, port, status, speed, devid);
69 dbg("socket %lx lbusid %s", socket, lbusid); 69 dbg("sockfd %u lbusid %s", sockfd, lbusid);
70 70
71 /* if a device is connected, look at it */ 71 /* if a device is connected, look at it */
72 idev = &vhci_driver->idev[port]; 72 idev = &vhci_driver->idev[port];
@@ -106,7 +106,7 @@ static int parse_status(const char *value)
106 return 0; 106 return 0;
107} 107}
108 108
109#define MAX_STATUS_NAME 16 109#define MAX_STATUS_NAME 18
110 110
111static int refresh_imported_device_list(void) 111static int refresh_imported_device_list(void)
112{ 112{
diff --git a/tools/usb/usbip/src/utils.c b/tools/usb/usbip/src/utils.c
index 2b3d6d235015..3d7b42e77299 100644
--- a/tools/usb/usbip/src/utils.c
+++ b/tools/usb/usbip/src/utils.c
@@ -30,6 +30,7 @@ int modify_match_busid(char *busid, int add)
30 char command[SYSFS_BUS_ID_SIZE + 4]; 30 char command[SYSFS_BUS_ID_SIZE + 4];
31 char match_busid_attr_path[SYSFS_PATH_MAX]; 31 char match_busid_attr_path[SYSFS_PATH_MAX];
32 int rc; 32 int rc;
33 int cmd_size;
33 34
34 snprintf(match_busid_attr_path, sizeof(match_busid_attr_path), 35 snprintf(match_busid_attr_path, sizeof(match_busid_attr_path),
35 "%s/%s/%s/%s/%s/%s", SYSFS_MNT_PATH, SYSFS_BUS_NAME, 36 "%s/%s/%s/%s/%s/%s", SYSFS_MNT_PATH, SYSFS_BUS_NAME,
@@ -37,12 +38,14 @@ int modify_match_busid(char *busid, int add)
37 attr_name); 38 attr_name);
38 39
39 if (add) 40 if (add)
40 snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s", busid); 41 cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s",
42 busid);
41 else 43 else
42 snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s", busid); 44 cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s",
45 busid);
43 46
44 rc = write_sysfs_attribute(match_busid_attr_path, command, 47 rc = write_sysfs_attribute(match_busid_attr_path, command,
45 sizeof(command)); 48 cmd_size);
46 if (rc < 0) { 49 if (rc < 0) {
47 dbg("failed to write match_busid: %s", strerror(errno)); 50 dbg("failed to write match_busid: %s", strerror(errno));
48 return -1; 51 return -1;
diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c
index 38bb171aceba..e6e81305ef46 100644
--- a/tools/virtio/ringtest/ptr_ring.c
+++ b/tools/virtio/ringtest/ptr_ring.c
@@ -16,24 +16,41 @@
16#define unlikely(x) (__builtin_expect(!!(x), 0)) 16#define unlikely(x) (__builtin_expect(!!(x), 0))
17#define likely(x) (__builtin_expect(!!(x), 1)) 17#define likely(x) (__builtin_expect(!!(x), 1))
18#define ALIGN(x, a) (((x) + (a) - 1) / (a) * (a)) 18#define ALIGN(x, a) (((x) + (a) - 1) / (a) * (a))
19#define SIZE_MAX (~(size_t)0)
20
19typedef pthread_spinlock_t spinlock_t; 21typedef pthread_spinlock_t spinlock_t;
20 22
21typedef int gfp_t; 23typedef int gfp_t;
22static void *kmalloc(unsigned size, gfp_t gfp) 24#define __GFP_ZERO 0x1
23{
24 return memalign(64, size);
25}
26 25
27static void *kzalloc(unsigned size, gfp_t gfp) 26static void *kmalloc(unsigned size, gfp_t gfp)
28{ 27{
29 void *p = memalign(64, size); 28 void *p = memalign(64, size);
30 if (!p) 29 if (!p)
31 return p; 30 return p;
32 memset(p, 0, size);
33 31
32 if (gfp & __GFP_ZERO)
33 memset(p, 0, size);
34 return p; 34 return p;
35} 35}
36 36
37static inline void *kzalloc(unsigned size, gfp_t flags)
38{
39 return kmalloc(size, flags | __GFP_ZERO);
40}
41
42static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
43{
44 if (size != 0 && n > SIZE_MAX / size)
45 return NULL;
46 return kmalloc(n * size, flags);
47}
48
49static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
50{
51 return kmalloc_array(n, size, flags | __GFP_ZERO);
52}
53
37static void kfree(void *p) 54static void kfree(void *p)
38{ 55{
39 if (p) 56 if (p)
diff --git a/tools/vm/slabinfo-gnuplot.sh b/tools/vm/slabinfo-gnuplot.sh
index 35b039864b77..0cf28aa6f21c 100644
--- a/tools/vm/slabinfo-gnuplot.sh
+++ b/tools/vm/slabinfo-gnuplot.sh
@@ -1,4 +1,4 @@
1#!/bin/sh 1#!/bin/bash
2 2
3# Sergey Senozhatsky, 2015 3# Sergey Senozhatsky, 2015
4# sergey.senozhatsky.work@gmail.com 4# sergey.senozhatsky.work@gmail.com
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index f9555b1e7f15..cc29a8148328 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -92,16 +92,23 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
92{ 92{
93 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; 93 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
94 struct arch_timer_context *vtimer; 94 struct arch_timer_context *vtimer;
95 u32 cnt_ctl;
95 96
96 if (!vcpu) { 97 /*
97 pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n"); 98 * We may see a timer interrupt after vcpu_put() has been called which
98 return IRQ_NONE; 99 * sets the CPU's vcpu pointer to NULL, because even though the timer
99 } 100 * has been disabled in vtimer_save_state(), the hardware interrupt
100 vtimer = vcpu_vtimer(vcpu); 101 * signal may not have been retired from the interrupt controller yet.
102 */
103 if (!vcpu)
104 return IRQ_HANDLED;
101 105
106 vtimer = vcpu_vtimer(vcpu);
102 if (!vtimer->irq.level) { 107 if (!vtimer->irq.level) {
103 vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); 108 cnt_ctl = read_sysreg_el0(cntv_ctl);
104 if (kvm_timer_irq_can_fire(vtimer)) 109 cnt_ctl &= ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT |
110 ARCH_TIMER_CTRL_IT_MASK;
111 if (cnt_ctl == (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT))
105 kvm_timer_update_irq(vcpu, true, vtimer); 112 kvm_timer_update_irq(vcpu, true, vtimer);
106 } 113 }
107 114
@@ -355,6 +362,7 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu)
355 362
356 /* Disable the virtual timer */ 363 /* Disable the virtual timer */
357 write_sysreg_el0(0, cntv_ctl); 364 write_sysreg_el0(0, cntv_ctl);
365 isb();
358 366
359 vtimer->loaded = false; 367 vtimer->loaded = false;
360out: 368out:
@@ -720,7 +728,7 @@ static int kvm_timer_dying_cpu(unsigned int cpu)
720 return 0; 728 return 0;
721} 729}
722 730
723int kvm_timer_hyp_init(void) 731int kvm_timer_hyp_init(bool has_gic)
724{ 732{
725 struct arch_timer_kvm_info *info; 733 struct arch_timer_kvm_info *info;
726 int err; 734 int err;
@@ -756,10 +764,13 @@ int kvm_timer_hyp_init(void)
756 return err; 764 return err;
757 } 765 }
758 766
759 err = irq_set_vcpu_affinity(host_vtimer_irq, kvm_get_running_vcpus()); 767 if (has_gic) {
760 if (err) { 768 err = irq_set_vcpu_affinity(host_vtimer_irq,
761 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 769 kvm_get_running_vcpus());
762 goto out_free_irq; 770 if (err) {
771 kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
772 goto out_free_irq;
773 }
763 } 774 }
764 775
765 kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); 776 kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
@@ -835,10 +846,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
835no_vgic: 846no_vgic:
836 preempt_disable(); 847 preempt_disable();
837 timer->enabled = 1; 848 timer->enabled = 1;
838 if (!irqchip_in_kernel(vcpu->kvm)) 849 kvm_timer_vcpu_load(vcpu);
839 kvm_timer_vcpu_load_user(vcpu);
840 else
841 kvm_timer_vcpu_load_vgic(vcpu);
842 preempt_enable(); 850 preempt_enable();
843 851
844 return 0; 852 return 0;
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 6b60c98a6e22..2e43f9d42bd5 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -1326,7 +1326,7 @@ static int init_subsystems(void)
1326 /* 1326 /*
1327 * Init HYP architected timer support 1327 * Init HYP architected timer support
1328 */ 1328 */
1329 err = kvm_timer_hyp_init(); 1329 err = kvm_timer_hyp_init(vgic_present);
1330 if (err) 1330 if (err)
1331 goto out; 1331 goto out;
1332 1332
diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c
index b6e715fd3c90..dac7ceb1a677 100644
--- a/virt/kvm/arm/mmio.c
+++ b/virt/kvm/arm/mmio.c
@@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
112 } 112 }
113 113
114 trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, 114 trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
115 data); 115 &data);
116 data = vcpu_data_host_to_guest(vcpu, data, len); 116 data = vcpu_data_host_to_guest(vcpu, data, len);
117 vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data); 117 vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
118 } 118 }
@@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
182 data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt), 182 data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
183 len); 183 len);
184 184
185 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data); 185 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data);
186 kvm_mmio_write_buf(data_buf, len, data); 186 kvm_mmio_write_buf(data_buf, len, data);
187 187
188 ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, 188 ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
189 data_buf); 189 data_buf);
190 } else { 190 } else {
191 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len, 191 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
192 fault_ipa, 0); 192 fault_ipa, NULL);
193 193
194 ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len, 194 ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
195 data_buf); 195 data_buf);
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index b36945d49986..b4b69c2d1012 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -509,8 +509,6 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
509 */ 509 */
510void free_hyp_pgds(void) 510void free_hyp_pgds(void)
511{ 511{
512 unsigned long addr;
513
514 mutex_lock(&kvm_hyp_pgd_mutex); 512 mutex_lock(&kvm_hyp_pgd_mutex);
515 513
516 if (boot_hyp_pgd) { 514 if (boot_hyp_pgd) {
@@ -521,10 +519,10 @@ void free_hyp_pgds(void)
521 519
522 if (hyp_pgd) { 520 if (hyp_pgd) {
523 unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE); 521 unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE);
524 for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) 522 unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET),
525 unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE); 523 (uintptr_t)high_memory - PAGE_OFFSET);
526 for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) 524 unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START),
527 unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE); 525 VMALLOC_END - VMALLOC_START);
528 526
529 free_pages((unsigned long)hyp_pgd, hyp_pgd_order); 527 free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
530 hyp_pgd = NULL; 528 hyp_pgd = NULL;