aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-devices-platform-dock39
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu77
-rw-r--r--Documentation/ABI/testing/sysfs-platform-dptf40
-rw-r--r--Documentation/atomic_bitops.txt7
-rw-r--r--Documentation/devicetree/bindings/net/ti,dp83867.txt2
-rw-r--r--Documentation/devicetree/bindings/power/mti,mips-cpc.txt8
-rw-r--r--Documentation/features/sched/membarrier-sync-core/arch-support.txt62
-rw-r--r--Documentation/locking/mutex-design.txt49
-rw-r--r--Documentation/networking/segmentation-offloads.txt38
-rw-r--r--MAINTAINERS2
-rw-r--r--Makefile2
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/hugetlb.h2
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h26
-rw-r--r--arch/arm64/include/asm/mmu_context.h4
-rw-r--r--arch/arm64/include/asm/pgalloc.h44
-rw-r--r--arch/arm64/include/asm/pgtable.h23
-rw-r--r--arch/arm64/kernel/cpu_errata.c9
-rw-r--r--arch/arm64/kernel/efi.c2
-rw-r--r--arch/arm64/kernel/hibernate.c148
-rw-r--r--arch/arm64/kvm/hyp/switch.c4
-rw-r--r--arch/arm64/mm/dump.c54
-rw-r--r--arch/arm64/mm/fault.c44
-rw-r--r--arch/arm64/mm/hugetlbpage.c94
-rw-r--r--arch/arm64/mm/kasan_init.c70
-rw-r--r--arch/arm64/mm/mmu.c282
-rw-r--r--arch/arm64/mm/pageattr.c32
-rw-r--r--arch/arm64/mm/proc.S14
-rw-r--r--arch/ia64/kernel/Makefile1
-rw-r--r--arch/mips/kernel/mips-cpc.c13
-rw-r--r--arch/mips/kernel/setup.c16
-rw-r--r--arch/mips/kernel/smp-bmips.c2
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-4k.h3
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-64k.h16
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h13
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgalloc.h16
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h4
-rw-r--r--arch/powerpc/include/asm/exception-64s.h2
-rw-r--r--arch/powerpc/include/asm/hw_irq.h12
-rw-r--r--arch/powerpc/include/asm/kexec.h6
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/topology.h13
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S2
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S6
-rw-r--r--arch/powerpc/kernel/sysfs.c6
-rw-r--r--arch/powerpc/mm/drmem.c8
-rw-r--r--arch/powerpc/mm/hash64_4k.c4
-rw-r--r--arch/powerpc/mm/hash64_64k.c8
-rw-r--r--arch/powerpc/mm/hash_utils_64.c1
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c10
-rw-r--r--arch/powerpc/mm/init-common.c4
-rw-r--r--arch/powerpc/mm/numa.c5
-rw-r--r--arch/powerpc/mm/pgtable-radix.c117
-rw-r--r--arch/powerpc/mm/pgtable_64.c4
-rw-r--r--arch/powerpc/mm/tlb_hash64.c9
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c6
-rw-r--r--arch/powerpc/platforms/powernv/vas-window.c16
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c4
-rw-r--r--arch/powerpc/platforms/pseries/ras.c31
-rw-r--r--arch/powerpc/sysdev/xive/spapr.c16
-rw-r--r--arch/sparc/Kconfig2
-rw-r--r--arch/x86/.gitignore1
-rw-r--r--arch/x86/Kconfig77
-rw-r--r--arch/x86/Kconfig.cpu4
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c10
-rw-r--r--arch/x86/entry/calling.h107
-rw-r--r--arch/x86/entry/entry_64.S92
-rw-r--r--arch/x86/entry/entry_64_compat.S30
-rw-r--r--arch/x86/events/intel/core.c2
-rw-r--r--arch/x86/events/intel/lbr.c2
-rw-r--r--arch/x86/events/intel/p6.c2
-rw-r--r--arch/x86/include/asm/acpi.h2
-rw-r--r--arch/x86/include/asm/barrier.h2
-rw-r--r--arch/x86/include/asm/bug.h19
-rw-r--r--arch/x86/include/asm/cpufeature.h79
-rw-r--r--arch/x86/include/asm/nospec-branch.h14
-rw-r--r--arch/x86/include/asm/page_64.h4
-rw-r--r--arch/x86/include/asm/paravirt.h4
-rw-r--r--arch/x86/include/asm/paravirt_types.h2
-rw-r--r--arch/x86/include/asm/pgtable_32.h2
-rw-r--r--arch/x86/include/asm/processor.h7
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/tlbflush.h27
-rw-r--r--arch/x86/kernel/amd_nb.c2
-rw-r--r--arch/x86/kernel/apic/apic.c6
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c15
-rw-r--r--arch/x86/kernel/asm-offsets_32.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c28
-rw-r--r--arch/x86/kernel/cpu/bugs.c34
-rw-r--r--arch/x86/kernel/cpu/centaur.c4
-rw-r--r--arch/x86/kernel/cpu/common.c10
-rw-r--r--arch/x86/kernel/cpu/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c31
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h15
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c19
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c6
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c4
-rw-r--r--arch/x86/kernel/cpu/proc.c8
-rw-r--r--arch/x86/kernel/head_32.S4
-rw-r--r--arch/x86/kernel/mpparse.c2
-rw-r--r--arch/x86/kernel/paravirt.c6
-rw-r--r--arch/x86/kernel/smpboot.c11
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--arch/x86/kvm/mmu.c10
-rw-r--r--arch/x86/kvm/vmx.c9
-rw-r--r--arch/x86/lib/cpu.c2
-rw-r--r--arch/x86/lib/error-inject.c1
-rw-r--r--arch/x86/mm/init_64.c6
-rw-r--r--arch/x86/mm/ioremap.c2
-rw-r--r--arch/x86/mm/kmmio.c2
-rw-r--r--arch/x86/mm/pgtable_32.c2
-rw-r--r--arch/x86/mm/tlb.c6
-rw-r--r--arch/x86/platform/uv/tlb_uv.c2
-rw-r--r--arch/x86/xen/mmu_pv.c6
-rw-r--r--arch/x86/xen/smp.c2
-rw-r--r--block/blk-mq.c1
-rw-r--r--crypto/sha3_generic.c218
-rw-r--r--drivers/acpi/bus.c75
-rw-r--r--drivers/acpi/ec.c6
-rw-r--r--drivers/acpi/property.c4
-rw-r--r--drivers/acpi/spcr.c1
-rw-r--r--drivers/base/core.c3
-rw-r--r--drivers/base/power/wakeirq.c6
-rw-r--r--drivers/base/property.c5
-rw-r--r--drivers/bluetooth/ath3k.c28
-rw-r--r--drivers/bluetooth/btmrvl_main.c2
-rw-r--r--drivers/bluetooth/btrtl.c119
-rw-r--r--drivers/bluetooth/btusb.c10
-rw-r--r--drivers/bluetooth/hci_ath.c4
-rw-r--r--drivers/bluetooth/hci_ll.c2
-rw-r--r--drivers/char/hw_random/via-rng.c2
-rw-r--r--drivers/cpufreq/acpi-cpufreq.c2
-rw-r--r--drivers/cpufreq/longhaul.c6
-rw-r--r--drivers/cpufreq/p4-clockmod.c2
-rw-r--r--drivers/cpufreq/powernow-k7.c2
-rw-r--r--drivers/cpufreq/speedstep-centrino.c4
-rw-r--r--drivers/cpufreq/speedstep-lib.c6
-rw-r--r--drivers/crypto/caam/ctrl.c8
-rw-r--r--drivers/crypto/padlock-aes.c2
-rw-r--r--drivers/crypto/sunxi-ss/sun4i-ss-prng.c6
-rw-r--r--drivers/crypto/talitos.c4
-rw-r--r--drivers/edac/amd64_edac.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c1
-rw-r--r--drivers/gpu/drm/i915/gvt/kvmgt.c51
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio_context.c1
-rw-r--r--drivers/gpu/drm/i915/gvt/trace.h2
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c14
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h2
-rw-r--r--drivers/gpu/drm/i915/i915_gem_context.c2
-rw-r--r--drivers/gpu/drm/i915/i915_oa_cflgt3.c4
-rw-r--r--drivers/gpu/drm/i915/i915_oa_cnl.c4
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c231
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.h6
-rw-r--r--drivers/gpu/drm/i915/intel_bios.c105
-rw-r--r--drivers/gpu/drm/i915/intel_breadcrumbs.c29
-rw-r--r--drivers/gpu/drm/i915/intel_cdclk.c8
-rw-r--r--drivers/gpu/drm/i915/intel_engine_cs.c24
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h14
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c6
-rw-r--r--drivers/hwmon/coretemp.c6
-rw-r--r--drivers/hwmon/hwmon-vid.c2
-rw-r--r--drivers/hwmon/k10temp.c7
-rw-r--r--drivers/hwmon/k8temp.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_transport.c5
-rw-r--r--drivers/irqchip/irq-bcm7038-l1.c3
-rw-r--r--drivers/irqchip/irq-bcm7120-l2.c3
-rw-r--r--drivers/irqchip/irq-brcmstb-l2.c3
-rw-r--r--drivers/irqchip/irq-gic-v2m.c46
-rw-r--r--drivers/irqchip/irq-gic-v3-its-pci-msi.c2
-rw-r--r--drivers/irqchip/irq-gic-v3-its-platform-msi.c2
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c2
-rw-r--r--drivers/irqchip/irq-gic-v3.c4
-rw-r--r--drivers/irqchip/irq-mips-gic.c2
-rw-r--r--drivers/isdn/mISDN/socket.c5
-rw-r--r--drivers/macintosh/macio_asic.c1
-rw-r--r--drivers/md/dm.c3
-rw-r--r--drivers/misc/ocxl/file.c2
-rw-r--r--drivers/mmc/host/bcm2835.c3
-rw-r--r--drivers/mmc/host/meson-gx-mmc.c19
-rw-r--r--drivers/mtd/nand/Kconfig2
-rw-r--r--drivers/mtd/nand/vf610_nfc.c6
-rw-r--r--drivers/net/Kconfig3
-rw-r--r--drivers/net/Space.c3
-rw-r--r--drivers/net/dsa/mv88e6xxx/Kconfig10
-rw-r--r--drivers/net/dsa/mv88e6xxx/Makefile4
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c91
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.h103
-rw-r--r--drivers/net/dsa/mv88e6xxx/global2.c9
-rw-r--r--drivers/net/dsa/mv88e6xxx/global2.h85
-rw-r--r--drivers/net/dsa/mv88e6xxx/global2_avb.c193
-rw-r--r--drivers/net/dsa/mv88e6xxx/global2_scratch.c240
-rw-r--r--drivers/net/dsa/mv88e6xxx/hwtstamp.c576
-rw-r--r--drivers/net/dsa/mv88e6xxx/hwtstamp.h172
-rw-r--r--drivers/net/dsa/mv88e6xxx/ptp.c381
-rw-r--r--drivers/net/dsa/mv88e6xxx/ptp.h108
-rw-r--r--drivers/net/ethernet/8390/Makefile6
-rw-r--r--drivers/net/ethernet/8390/ax88796.c3
-rw-r--r--drivers/net/ethernet/8390/axnet_cs.c2
-rw-r--r--drivers/net/ethernet/8390/etherh.c17
-rw-r--r--drivers/net/ethernet/8390/hydra.c4
-rw-r--r--drivers/net/ethernet/8390/lib8390.c2
-rw-r--r--drivers/net/ethernet/8390/mac8390.c171
-rw-r--r--drivers/net/ethernet/8390/mcf8390.c4
-rw-r--r--drivers/net/ethernet/8390/ne.c2
-rw-r--r--drivers/net/ethernet/8390/pcnet_cs.c4
-rw-r--r--drivers/net/ethernet/8390/wd.c2
-rw-r--r--drivers/net/ethernet/8390/zorro8390.c5
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c35
-rw-r--r--drivers/net/ethernet/broadcom/tg3.h5
-rw-r--r--drivers/net/ethernet/cavium/common/cavium_ptp.c2
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c110
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_queues.c11
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_queues.h4
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c88
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h5
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c25
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_hw.c203
-rw-r--r--drivers/net/ethernet/freescale/dpaa/dpaa_eth.c2
-rw-r--r--drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c2
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c231
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.h5
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h10
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_debugfs.c52
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c89
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c158
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c421
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h68
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_type.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c1000
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h20
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.c425
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.h67
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf.h75
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c52
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_main.c931
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c334
-rw-r--r--drivers/net/ethernet/marvell/mvpp2.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c437
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h37
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c141
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c187
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c57
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c356
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h73
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c6
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/cmsg.h11
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/main.h1
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/match.c20
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/offload.c34
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h280
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c68
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c5
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c2
-rw-r--r--drivers/net/ethernet/realtek/r8169.c41
-rw-r--r--drivers/net/ethernet/renesas/ravb.h1
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c40
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.c34
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c208
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c23
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c34
-rw-r--r--drivers/net/ipvlan/ipvlan_core.c72
-rw-r--r--drivers/net/ipvlan/ipvlan_main.c48
-rw-r--r--drivers/net/loopback.c1
-rw-r--r--drivers/net/phy/dp83867.c19
-rw-r--r--drivers/net/phy/phy_device.c2
-rw-r--r--drivers/net/ppp/pppoe.c6
-rw-r--r--drivers/net/ppp/pptp.c6
-rw-r--r--drivers/net/thunderbolt.c19
-rw-r--r--drivers/net/tun.c88
-rw-r--r--drivers/net/vrf.c2
-rw-r--r--drivers/nvme/host/core.c45
-rw-r--r--drivers/nvme/host/fabrics.h9
-rw-r--r--drivers/nvme/host/fc.c157
-rw-r--r--drivers/nvme/host/nvme.h3
-rw-r--r--drivers/nvme/host/pci.c39
-rw-r--r--drivers/nvme/host/rdma.c16
-rw-r--r--drivers/nvme/target/io-cmd.c7
-rw-r--r--drivers/of/property.c4
-rw-r--r--drivers/opp/cpu.c2
-rw-r--r--drivers/pci/quirks.c39
-rw-r--r--drivers/platform/x86/dell-laptop.c20
-rw-r--r--drivers/platform/x86/ideapad-laptop.c2
-rw-r--r--drivers/platform/x86/wmi.c2
-rw-r--r--drivers/s390/virtio/virtio_ccw.c29
-rw-r--r--drivers/scsi/iscsi_tcp.c14
-rw-r--r--drivers/soc/qcom/qmi_interface.c3
-rw-r--r--drivers/staging/fsl-mc/bus/irq-gic-v3-its-fsl-mc-msi.c2
-rw-r--r--drivers/staging/ipx/af_ipx.c6
-rw-r--r--drivers/staging/irda/net/af_irda.c8
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-socket.c7
-rw-r--r--drivers/target/iscsi/iscsi_target_login.c18
-rw-r--r--drivers/usb/Kconfig6
-rw-r--r--drivers/usb/host/Kconfig8
-rw-r--r--drivers/vhost/net.c7
-rw-r--r--drivers/video/fbdev/geode/video_gx.c2
-rw-r--r--drivers/xen/pvcalls-front.c197
-rw-r--r--drivers/xen/xenbus/xenbus.h1
-rw-r--r--drivers/xen/xenbus/xenbus_comms.c1
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c3
-rw-r--r--fs/btrfs/backref.c11
-rw-r--r--fs/btrfs/delayed-ref.c3
-rw-r--r--fs/btrfs/extent-tree.c4
-rw-r--r--fs/btrfs/inode.c41
-rw-r--r--fs/btrfs/qgroup.c9
-rw-r--r--fs/btrfs/tree-log.c32
-rw-r--r--fs/btrfs/volumes.c1
-rw-r--r--fs/dlm/lowcomms.c7
-rw-r--r--fs/gfs2/bmap.c43
-rw-r--r--fs/nsfs.c1
-rw-r--r--fs/ocfs2/cluster/tcp.c6
-rw-r--r--fs/proc/kcore.c4
-rw-r--r--fs/proc/proc_net.c1
-rw-r--r--include/asm-generic/bitops/lock.h3
-rw-r--r--include/dt-bindings/net/ti-dp83867.h14
-rw-r--r--include/linux/acpi.h4
-rw-r--r--include/linux/atalk.h2
-rw-r--r--include/linux/avf/virtchnl.h107
-rw-r--r--include/linux/blkdev.h2
-rw-r--r--include/linux/compiler-gcc.h7
-rw-r--r--include/linux/compiler.h4
-rw-r--r--include/linux/cpuidle.h2
-rw-r--r--include/linux/cpumask.h2
-rw-r--r--include/linux/dma-mapping.h2
-rw-r--r--include/linux/fwnode.h4
-rw-r--r--include/linux/kcore.h1
-rw-r--r--include/linux/mm_inline.h6
-rw-r--r--include/linux/net.h8
-rw-r--r--include/linux/netdevice.h6
-rw-r--r--include/linux/nospec.h36
-rw-r--r--include/linux/property.h2
-rw-r--r--include/linux/ptp_classify.h4
-rw-r--r--include/linux/ptr_ring.h9
-rw-r--r--include/linux/rtnetlink.h2
-rw-r--r--include/linux/semaphore.h2
-rw-r--r--include/linux/skbuff.h5
-rw-r--r--include/linux/socket.h2
-rw-r--r--include/net/Space.h1
-rw-r--r--include/net/act_api.h17
-rw-r--r--include/net/ax25.h2
-rw-r--r--include/net/devlink.h4
-rw-r--r--include/net/dsa.h20
-rw-r--r--include/net/fib_rules.h3
-rw-r--r--include/net/inet_common.h2
-rw-r--r--include/net/ipv6.h2
-rw-r--r--include/net/lwtunnel.h15
-rw-r--r--include/net/net_namespace.h16
-rw-r--r--include/net/route.h2
-rw-r--r--include/net/sock.h12
-rw-r--r--include/net/tcp.h3
-rw-r--r--include/net/tcp_states.h26
-rw-r--r--include/net/udplite.h1
-rw-r--r--include/sound/ac97/regs.h2
-rw-r--r--include/trace/events/xen.h2
-rw-r--r--include/uapi/linux/errqueue.h2
-rw-r--r--include/uapi/linux/fib_rules.h2
-rw-r--r--include/uapi/linux/if_ether.h6
-rw-r--r--include/uapi/linux/if_link.h18
-rw-r--r--include/uapi/linux/libc-compat.h6
-rw-r--r--include/uapi/linux/pkt_cls.h3
-rw-r--r--include/uapi/linux/rds.h1
-rw-r--r--include/uapi/linux/tc_ematch/tc_em_ipt.h20
-rw-r--r--kernel/audit.c1
-rw-r--r--kernel/irq/irqdomain.c18
-rw-r--r--kernel/kprobes.c178
-rw-r--r--kernel/locking/qspinlock.c21
-rw-r--r--kernel/sched/core.c27
-rw-r--r--kernel/sched/cpufreq_schedutil.c2
-rw-r--r--kernel/sched/deadline.c6
-rw-r--r--kernel/sched/rt.c3
-rw-r--r--lib/dma-direct.c5
-rw-r--r--lib/kobject_uevent.c1
-rw-r--r--mm/memory-failure.c2
-rw-r--r--mm/memory.c2
-rw-r--r--net/9p/trans_virtio.c3
-rw-r--r--net/appletalk/ddp.c5
-rw-r--r--net/atm/pvc.c5
-rw-r--r--net/atm/svc.c5
-rw-r--r--net/ax25/af_ax25.c4
-rw-r--r--net/bluetooth/hci_request.c6
-rw-r--r--net/bluetooth/hci_sock.c4
-rw-r--r--net/bluetooth/l2cap_sock.c5
-rw-r--r--net/bluetooth/rfcomm/sock.c5
-rw-r--r--net/bluetooth/sco.c5
-rw-r--r--net/bridge/br_sysfs_if.c3
-rw-r--r--net/can/raw.c6
-rw-r--r--net/core/dev.c16
-rw-r--r--net/core/devlink.c32
-rw-r--r--net/core/fib_notifier.c1
-rw-r--r--net/core/fib_rules.c8
-rw-r--r--net/core/net-procfs.c2
-rw-r--r--net/core/net_namespace.c134
-rw-r--r--net/core/rtnetlink.c5
-rw-r--r--net/core/skbuff.c6
-rw-r--r--net/core/sock.c34
-rw-r--r--net/core/sock_diag.c1
-rw-r--r--net/core/sysctl_net_core.c1
-rw-r--r--net/decnet/af_decnet.c68
-rw-r--r--net/dsa/dsa.c36
-rw-r--r--net/dsa/slave.c59
-rw-r--r--net/ipv4/af_inet.c7
-rw-r--r--net/ipv4/arp.c1
-rw-r--r--net/ipv4/devinet.c1
-rw-r--r--net/ipv4/fib_frontend.c1
-rw-r--r--net/ipv4/fib_semantics.c17
-rw-r--r--net/ipv4/icmp.c1
-rw-r--r--net/ipv4/igmp.c1
-rw-r--r--net/ipv4/ip_fragment.c1
-rw-r--r--net/ipv4/ip_sockglue.c6
-rw-r--r--net/ipv4/ipmr.c1
-rw-r--r--net/ipv4/netfilter/ip_tables.c1
-rw-r--r--net/ipv4/netfilter/iptable_filter.c1
-rw-r--r--net/ipv4/ping.c1
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/raw.c1
-rw-r--r--net/ipv4/route.c31
-rw-r--r--net/ipv4/sysctl_net_ipv4.c1
-rw-r--r--net/ipv4/tcp.c57
-rw-r--r--net/ipv4/tcp_input.c7
-rw-r--r--net/ipv4/tcp_ipv4.c28
-rw-r--r--net/ipv4/tcp_metrics.c1
-rw-r--r--net/ipv4/tcp_minisocks.c3
-rw-r--r--net/ipv4/tcp_output.c65
-rw-r--r--net/ipv4/udp.c6
-rw-r--r--net/ipv4/udplite.c1
-rw-r--r--net/ipv4/xfrm4_policy.c2
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/addrlabel.c1
-rw-r--r--net/ipv6/af_inet6.c6
-rw-r--r--net/ipv6/fib6_rules.c1
-rw-r--r--net/ipv6/icmp.c1
-rw-r--r--net/ipv6/ip6_checksum.c5
-rw-r--r--net/ipv6/ip6_fib.c1
-rw-r--r--net/ipv6/ip6_flowlabel.c1
-rw-r--r--net/ipv6/ip6mr.c1
-rw-r--r--net/ipv6/mcast.c1
-rw-r--r--net/ipv6/ndisc.c1
-rw-r--r--net/ipv6/ping.c1
-rw-r--r--net/ipv6/proc.c1
-rw-r--r--net/ipv6/raw.c1
-rw-r--r--net/ipv6/reassembly.c1
-rw-r--r--net/ipv6/route.c12
-rw-r--r--net/ipv6/seg6.c1
-rw-r--r--net/ipv6/sysctl_net_ipv6.c1
-rw-r--r--net/ipv6/tcp_ipv6.c14
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_policy.c1
-rw-r--r--net/iucv/af_iucv.c5
-rw-r--r--net/kcm/kcmsock.c1
-rw-r--r--net/l2tp/l2tp_ip.c5
-rw-r--r--net/l2tp/l2tp_ip6.c5
-rw-r--r--net/l2tp/l2tp_ppp.c5
-rw-r--r--net/llc/af_llc.c5
-rw-r--r--net/netfilter/core.c1
-rw-r--r--net/netfilter/nf_log.c1
-rw-r--r--net/netfilter/x_tables.c1
-rw-r--r--net/netlink/af_netlink.c7
-rw-r--r--net/netlink/genetlink.c1
-rw-r--r--net/netrom/af_netrom.c9
-rw-r--r--net/nfc/llcp_commands.c4
-rw-r--r--net/nfc/llcp_sock.c5
-rw-r--r--net/nfc/netlink.c3
-rw-r--r--net/packet/af_packet.c11
-rw-r--r--net/phonet/socket.c5
-rw-r--r--net/qrtr/qrtr.c5
-rw-r--r--net/rds/af_rds.c7
-rw-r--r--net/rds/connection.c2
-rw-r--r--net/rds/message.c132
-rw-r--r--net/rds/rds.h17
-rw-r--r--net/rds/recv.c2
-rw-r--r--net/rds/send.c53
-rw-r--r--net/rds/tcp.c7
-rw-r--r--net/rose/af_rose.c5
-rw-r--r--net/rxrpc/recvmsg.c5
-rw-r--r--net/sched/Kconfig12
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c140
-rw-r--r--net/sched/act_bpf.c10
-rw-r--r--net/sched/act_connmark.c11
-rw-r--r--net/sched/act_csum.c10
-rw-r--r--net/sched/act_gact.c10
-rw-r--r--net/sched/act_ife.c10
-rw-r--r--net/sched/act_ipt.c20
-rw-r--r--net/sched/act_mirred.c25
-rw-r--r--net/sched/act_nat.c11
-rw-r--r--net/sched/act_pedit.c10
-rw-r--r--net/sched/act_police.c11
-rw-r--r--net/sched/act_sample.c10
-rw-r--r--net/sched/act_simple.c10
-rw-r--r--net/sched/act_skbedit.c10
-rw-r--r--net/sched/act_skbmod.c10
-rw-r--r--net/sched/act_tunnel_key.c10
-rw-r--r--net/sched/act_vlan.c10
-rw-r--r--net/sched/cls_api.c30
-rw-r--r--net/sched/cls_u32.c24
-rw-r--r--net/sched/em_ipt.c257
-rw-r--r--net/sched/sch_api.c1
-rw-r--r--net/sctp/Makefile2
-rw-r--r--net/sctp/debug.c6
-rw-r--r--net/sctp/diag.c (renamed from net/sctp/sctp_diag.c)31
-rw-r--r--net/sctp/input.c5
-rw-r--r--net/sctp/ipv6.c8
-rw-r--r--net/sctp/stream.c2
-rw-r--r--net/sctp/stream_interleave.c16
-rw-r--r--net/smc/af_smc.c11
-rw-r--r--net/socket.c39
-rw-r--r--net/sunrpc/clnt.c6
-rw-r--r--net/sunrpc/svcsock.c13
-rw-r--r--net/sunrpc/xprtsock.c3
-rw-r--r--net/sysctl_net.c1
-rw-r--r--net/tipc/Makefile2
-rw-r--r--net/tipc/bearer.c90
-rw-r--r--net/tipc/bearer.h4
-rw-r--r--net/tipc/core.h6
-rw-r--r--net/tipc/group.c2
-rw-r--r--net/tipc/link.c3
-rw-r--r--net/tipc/name_table.c73
-rw-r--r--net/tipc/name_table.h2
-rw-r--r--net/tipc/net.c15
-rw-r--r--net/tipc/net.h1
-rw-r--r--net/tipc/netlink_compat.c43
-rw-r--r--net/tipc/node.c24
-rw-r--r--net/tipc/node.h1
-rw-r--r--net/tipc/server.c710
-rw-r--r--net/tipc/socket.c5
-rw-r--r--net/tipc/subscr.c361
-rw-r--r--net/tipc/subscr.h66
-rw-r--r--net/tipc/topsrv.c703
-rw-r--r--net/tipc/topsrv.h (renamed from net/tipc/server.h)57
-rw-r--r--net/tls/tls_main.c7
-rw-r--r--net/unix/af_unix.c13
-rw-r--r--net/vmw_vsock/af_vsock.c4
-rw-r--r--net/wireless/core.c1
-rw-r--r--net/wireless/wext-core.c1
-rw-r--r--net/x25/af_x25.c4
-rw-r--r--net/x25/x25_subr.c3
-rw-r--r--net/xfrm/xfrm_policy.c1
-rw-r--r--security/tomoyo/network.c5
-rw-r--r--sound/ac97/Kconfig1
-rw-r--r--sound/core/seq/seq_clientmgr.c8
-rw-r--r--sound/pci/hda/patch_realtek.c72
-rw-r--r--sound/usb/mixer.c18
-rw-r--r--sound/usb/pcm.c9
-rw-r--r--sound/usb/quirks.c7
-rw-r--r--tools/arch/powerpc/include/uapi/asm/kvm.h2
-rw-r--r--tools/arch/s390/include/uapi/asm/unistd.h412
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h1
-rw-r--r--tools/include/uapi/drm/i915_drm.h77
-rw-r--r--tools/include/uapi/linux/if_link.h1
-rw-r--r--tools/include/uapi/linux/kvm.h90
-rw-r--r--tools/objtool/check.c53
-rw-r--r--tools/objtool/check.h1
-rw-r--r--tools/perf/Documentation/perf-data.txt4
-rw-r--r--tools/perf/arch/s390/Makefile10
-rwxr-xr-xtools/perf/arch/s390/entry/syscalls/mksyscalltbl18
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl390
-rw-r--r--tools/perf/builtin-c2c.c4
-rw-r--r--tools/perf/builtin-report.c3
-rw-r--r--tools/perf/builtin-top.c150
-rwxr-xr-xtools/perf/check-headers.sh1
-rw-r--r--tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json27
-rw-r--r--tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json22
-rw-r--r--tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json27
-rw-r--r--tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json22
-rw-r--r--tools/perf/pmu-events/arch/arm64/cortex-a53/other.json32
-rw-r--r--tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json52
-rw-r--r--tools/perf/pmu-events/arch/arm64/mapfile.csv1
-rw-r--r--tools/perf/tests/backward-ring-buffer.c7
-rwxr-xr-xtools/perf/tests/shell/trace+probe_libc_inet_pton.sh23
-rw-r--r--tools/perf/ui/browsers/hists.c38
-rw-r--r--tools/perf/ui/browsers/hists.h3
-rw-r--r--tools/perf/util/evlist.c17
-rw-r--r--tools/perf/util/evlist.h4
-rw-r--r--tools/perf/util/evsel.c12
-rw-r--r--tools/perf/util/evsel.h14
-rw-r--r--tools/perf/util/hist.h6
-rw-r--r--tools/perf/util/mmap.c141
-rw-r--r--tools/perf/util/mmap.h10
-rw-r--r--tools/perf/util/util.c24
-rwxr-xr-xtools/testing/selftests/net/fib-onlink-tests.sh375
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh483
-rw-r--r--tools/testing/selftests/net/msg_zerocopy.c133
-rw-r--r--tools/testing/selftests/net/psock_fanout.c3
-rw-r--r--tools/testing/selftests/powerpc/alignment/alignment_handler.c2
-rw-r--r--tools/testing/selftests/tc-testing/README173
-rw-r--r--tools/testing/selftests/tc-testing/TODO.txt25
-rw-r--r--tools/testing/selftests/tc-testing/TdcPlugin.py74
-rw-r--r--tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt104
-rw-r--r--tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt35
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS27
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py141
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py19
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py142
-rw-r--r--tools/testing/selftests/tc-testing/plugins/__init__.py0
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.py486
-rw-r--r--tools/testing/selftests/tc-testing/tdc_helper.py15
-rw-r--r--tools/testing/selftests/x86/Makefile24
-rw-r--r--tools/testing/selftests/x86/mpx-mini-test.c32
-rw-r--r--tools/testing/selftests/x86/protection_keys.c28
-rw-r--r--tools/testing/selftests/x86/single_step_syscall.c5
-rw-r--r--tools/testing/selftests/x86/test_mremap_vdso.c4
-rw-r--r--tools/testing/selftests/x86/test_vdso.c55
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c11
612 files changed, 15332 insertions, 6958 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-platform-dock b/Documentation/ABI/testing/sysfs-devices-platform-dock
new file mode 100644
index 000000000000..1d8c18f905c7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-platform-dock
@@ -0,0 +1,39 @@
1What: /sys/devices/platform/dock.N/docked
2Date: Dec, 2006
3KernelVersion: 2.6.19
4Contact: linux-acpi@vger.kernel.org
5Description:
6 (RO) Value 1 or 0 indicates whether the software believes the
7 laptop is docked in a docking station.
8
9What: /sys/devices/platform/dock.N/undock
10Date: Dec, 2006
11KernelVersion: 2.6.19
12Contact: linux-acpi@vger.kernel.org
13Description:
14 (WO) Writing to this file causes the software to initiate an
15 undock request to the firmware.
16
17What: /sys/devices/platform/dock.N/uid
18Date: Feb, 2007
19KernelVersion: v2.6.21
20Contact: linux-acpi@vger.kernel.org
21Description:
22 (RO) Displays the docking station the laptop is docked to.
23
24What: /sys/devices/platform/dock.N/flags
25Date: May, 2007
26KernelVersion: v2.6.21
27Contact: linux-acpi@vger.kernel.org
28Description:
29 (RO) Show dock station flags, useful for checking if undock
30 request has been made by the user (from the immediate_undock
31 option).
32
33What: /sys/devices/platform/dock.N/type
34Date: Aug, 2008
35KernelVersion: v2.6.27
36Contact: linux-acpi@vger.kernel.org
37Description:
38 (RO) Display the dock station type- dock_station, ata_bay or
39 battery_bay.
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index bfd29bc8d37a..4ed63b6cfb15 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -108,6 +108,8 @@ Description: CPU topology files that describe a logical CPU's relationship
108 108
109What: /sys/devices/system/cpu/cpuidle/current_driver 109What: /sys/devices/system/cpu/cpuidle/current_driver
110 /sys/devices/system/cpu/cpuidle/current_governer_ro 110 /sys/devices/system/cpu/cpuidle/current_governer_ro
111 /sys/devices/system/cpu/cpuidle/available_governors
112 /sys/devices/system/cpu/cpuidle/current_governor
111Date: September 2007 113Date: September 2007
112Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> 114Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
113Description: Discover cpuidle policy and mechanism 115Description: Discover cpuidle policy and mechanism
@@ -119,13 +121,84 @@ Description: Discover cpuidle policy and mechanism
119 Idle policy (governor) is differentiated from idle mechanism 121 Idle policy (governor) is differentiated from idle mechanism
120 (driver) 122 (driver)
121 123
122 current_driver: displays current idle mechanism 124 current_driver: (RO) displays current idle mechanism
123 125
124 current_governor_ro: displays current idle policy 126 current_governor_ro: (RO) displays current idle policy
127
128 With the cpuidle_sysfs_switch boot option enabled (meant for
129 developer testing), the following three attributes are visible
130 instead:
131
132 current_driver: same as described above
133
134 available_governors: (RO) displays a space separated list of
135 available governors
136
137 current_governor: (RW) displays current idle policy. Users can
138 switch the governor at runtime by writing to this file.
125 139
126 See files in Documentation/cpuidle/ for more information. 140 See files in Documentation/cpuidle/ for more information.
127 141
128 142
143What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/name
144 /sys/devices/system/cpu/cpuX/cpuidle/stateN/latency
145 /sys/devices/system/cpu/cpuX/cpuidle/stateN/power
146 /sys/devices/system/cpu/cpuX/cpuidle/stateN/time
147 /sys/devices/system/cpu/cpuX/cpuidle/stateN/usage
148Date: September 2007
149KernelVersion: v2.6.24
150Contact: Linux power management list <linux-pm@vger.kernel.org>
151Description:
152 The directory /sys/devices/system/cpu/cpuX/cpuidle contains per
153 logical CPU specific cpuidle information for each online cpu X.
154 The processor idle states which are available for use have the
155 following attributes:
156
157 name: (RO) Name of the idle state (string).
158
159 latency: (RO) The latency to exit out of this idle state (in
160 microseconds).
161
162 power: (RO) The power consumed while in this idle state (in
163 milliwatts).
164
165 time: (RO) The total time spent in this idle state (in microseconds).
166
167 usage: (RO) Number of times this state was entered (a count).
168
169
170What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/desc
171Date: February 2008
172KernelVersion: v2.6.25
173Contact: Linux power management list <linux-pm@vger.kernel.org>
174Description:
175 (RO) A small description about the idle state (string).
176
177
178What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/disable
179Date: March 2012
180KernelVersion: v3.10
181Contact: Linux power management list <linux-pm@vger.kernel.org>
182Description:
183 (RW) Option to disable this idle state (bool). The behavior and
184 the effect of the disable variable depends on the implementation
185 of a particular governor. In the ladder governor, for example,
186 it is not coherent, i.e. if one is disabling a light state, then
187 all deeper states are disabled as well, but the disable variable
188 does not reflect it. Likewise, if one enables a deep state but a
189 lighter state still is disabled, then this has no effect.
190
191
192What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/residency
193Date: March 2014
194KernelVersion: v3.15
195Contact: Linux power management list <linux-pm@vger.kernel.org>
196Description:
197 (RO) Display the target residency i.e. the minimum amount of
198 time (in microseconds) this cpu should spend in this idle state
199 to make the transition worth the effort.
200
201
129What: /sys/devices/system/cpu/cpu#/cpufreq/* 202What: /sys/devices/system/cpu/cpu#/cpufreq/*
130Date: pre-git history 203Date: pre-git history
131Contact: linux-pm@vger.kernel.org 204Contact: linux-pm@vger.kernel.org
diff --git a/Documentation/ABI/testing/sysfs-platform-dptf b/Documentation/ABI/testing/sysfs-platform-dptf
new file mode 100644
index 000000000000..325dc0667dbb
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-dptf
@@ -0,0 +1,40 @@
1What: /sys/bus/platform/devices/INT3407:00/dptf_power/charger_type
2Date: Jul, 2016
3KernelVersion: v4.10
4Contact: linux-acpi@vger.kernel.org
5Description:
6 (RO) The charger type - Traditional, Hybrid or NVDC.
7
8What: /sys/bus/platform/devices/INT3407:00/dptf_power/adapter_rating_mw
9Date: Jul, 2016
10KernelVersion: v4.10
11Contact: linux-acpi@vger.kernel.org
12Description:
13 (RO) Adapter rating in milliwatts (the maximum Adapter power).
14 Must be 0 if no AC Adaptor is plugged in.
15
16What: /sys/bus/platform/devices/INT3407:00/dptf_power/max_platform_power_mw
17Date: Jul, 2016
18KernelVersion: v4.10
19Contact: linux-acpi@vger.kernel.org
20Description:
21 (RO) Maximum platform power that can be supported by the battery
22 in milliwatts.
23
24What: /sys/bus/platform/devices/INT3407:00/dptf_power/platform_power_source
25Date: Jul, 2016
26KernelVersion: v4.10
27Contact: linux-acpi@vger.kernel.org
28Description:
29 (RO) Display the platform power source
30 0x00 = DC
31 0x01 = AC
32 0x02 = USB
33 0x03 = Wireless Charger
34
35What: /sys/bus/platform/devices/INT3407:00/dptf_power/battery_steady_power
36Date: Jul, 2016
37KernelVersion: v4.10
38Contact: linux-acpi@vger.kernel.org
39Description:
40 (RO) The maximum sustained power for battery in milliwatts.
diff --git a/Documentation/atomic_bitops.txt b/Documentation/atomic_bitops.txt
index 5550bfdcce5f..be70b32c95d9 100644
--- a/Documentation/atomic_bitops.txt
+++ b/Documentation/atomic_bitops.txt
@@ -58,7 +58,12 @@ Like with atomic_t, the rule of thumb is:
58 58
59 - RMW operations that have a return value are fully ordered. 59 - RMW operations that have a return value are fully ordered.
60 60
61Except for test_and_set_bit_lock() which has ACQUIRE semantics and 61 - RMW operations that are conditional are unordered on FAILURE,
62 otherwise the above rules apply. In the case of test_and_{}_bit() operations,
63 if the bit in memory is unchanged by the operation then it is deemed to have
64 failed.
65
66Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and
62clear_bit_unlock() which has RELEASE semantics. 67clear_bit_unlock() which has RELEASE semantics.
63 68
64Since a platform only has a single means of achieving atomic operations 69Since a platform only has a single means of achieving atomic operations
diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.txt b/Documentation/devicetree/bindings/net/ti,dp83867.txt
index 02c4353b5cf2..9ef9338aaee1 100644
--- a/Documentation/devicetree/bindings/net/ti,dp83867.txt
+++ b/Documentation/devicetree/bindings/net/ti,dp83867.txt
@@ -25,6 +25,8 @@ Optional property:
25 software needs to take when this pin is 25 software needs to take when this pin is
26 strapped in these modes. See data manual 26 strapped in these modes. See data manual
27 for details. 27 for details.
28 - ti,clk-output-sel - Muxing option for CLK_OUT pin - see dt-bindings/net/ti-dp83867.h
29 for applicable values.
28 30
29Note: ti,min-output-impedance and ti,max-output-impedance are mutually 31Note: ti,min-output-impedance and ti,max-output-impedance are mutually
30 exclusive. When both properties are present ti,max-output-impedance 32 exclusive. When both properties are present ti,max-output-impedance
diff --git a/Documentation/devicetree/bindings/power/mti,mips-cpc.txt b/Documentation/devicetree/bindings/power/mti,mips-cpc.txt
new file mode 100644
index 000000000000..c6b82511ae8a
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/mti,mips-cpc.txt
@@ -0,0 +1,8 @@
1Binding for MIPS Cluster Power Controller (CPC).
2
3This binding allows a system to specify where the CPC registers are
4located.
5
6Required properties:
7compatible : Should be "mti,mips-cpc".
8regs: Should describe the address & size of the CPC register region.
diff --git a/Documentation/features/sched/membarrier-sync-core/arch-support.txt b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
new file mode 100644
index 000000000000..2c815a7f1ba7
--- /dev/null
+++ b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
@@ -0,0 +1,62 @@
1#
2# Feature name: membarrier-sync-core
3# Kconfig: ARCH_HAS_MEMBARRIER_SYNC_CORE
4# description: arch supports core serializing membarrier
5#
6# Architecture requirements
7#
8# * arm64
9#
10# Rely on eret context synchronization when returning from IPI handler, and
11# when returning to user-space.
12#
13# * x86
14#
15# x86-32 uses IRET as return from interrupt, which takes care of the IPI.
16# However, it uses both IRET and SYSEXIT to go back to user-space. The IRET
17# instruction is core serializing, but not SYSEXIT.
18#
19# x86-64 uses IRET as return from interrupt, which takes care of the IPI.
20# However, it can return to user-space through either SYSRETL (compat code),
21# SYSRETQ, or IRET.
22#
23# Given that neither SYSRET{L,Q}, nor SYSEXIT, are core serializing, we rely
24# instead on write_cr3() performed by switch_mm() to provide core serialization
25# after changing the current mm, and deal with the special case of kthread ->
26# uthread (temporarily keeping current mm into active_mm) by issuing a
27# sync_core_before_usermode() in that specific case.
28#
29 -----------------------
30 | arch |status|
31 -----------------------
32 | alpha: | TODO |
33 | arc: | TODO |
34 | arm: | TODO |
35 | arm64: | ok |
36 | blackfin: | TODO |
37 | c6x: | TODO |
38 | cris: | TODO |
39 | frv: | TODO |
40 | h8300: | TODO |
41 | hexagon: | TODO |
42 | ia64: | TODO |
43 | m32r: | TODO |
44 | m68k: | TODO |
45 | metag: | TODO |
46 | microblaze: | TODO |
47 | mips: | TODO |
48 | mn10300: | TODO |
49 | nios2: | TODO |
50 | openrisc: | TODO |
51 | parisc: | TODO |
52 | powerpc: | TODO |
53 | s390: | TODO |
54 | score: | TODO |
55 | sh: | TODO |
56 | sparc: | TODO |
57 | tile: | TODO |
58 | um: | TODO |
59 | unicore32: | TODO |
60 | x86: | ok |
61 | xtensa: | TODO |
62 -----------------------
diff --git a/Documentation/locking/mutex-design.txt b/Documentation/locking/mutex-design.txt
index 60c482df1a38..818aca19612f 100644
--- a/Documentation/locking/mutex-design.txt
+++ b/Documentation/locking/mutex-design.txt
@@ -21,37 +21,23 @@ Implementation
21-------------- 21--------------
22 22
23Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h 23Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h
24and implemented in kernel/locking/mutex.c. These locks use a three 24and implemented in kernel/locking/mutex.c. These locks use an atomic variable
25state atomic counter (->count) to represent the different possible 25(->owner) to keep track of the lock state during its lifetime. Field owner
26transitions that can occur during the lifetime of a lock: 26actually contains 'struct task_struct *' to the current lock owner and it is
27 27therefore NULL if not currently owned. Since task_struct pointers are aligned
28 1: unlocked 28at at least L1_CACHE_BYTES, low bits (3) are used to store extra state (e.g.,
29 0: locked, no waiters 29if waiter list is non-empty). In its most basic form it also includes a
30 negative: locked, with potential waiters 30wait-queue and a spinlock that serializes access to it. Furthermore,
31 31CONFIG_MUTEX_SPIN_ON_OWNER=y systems use a spinner MCS lock (->osq), described
32In its most basic form it also includes a wait-queue and a spinlock 32below in (ii).
33that serializes access to it. CONFIG_SMP systems can also include
34a pointer to the lock task owner (->owner) as well as a spinner MCS
35lock (->osq), both described below in (ii).
36 33
37When acquiring a mutex, there are three possible paths that can be 34When acquiring a mutex, there are three possible paths that can be
38taken, depending on the state of the lock: 35taken, depending on the state of the lock:
39 36
40(i) fastpath: tries to atomically acquire the lock by decrementing the 37(i) fastpath: tries to atomically acquire the lock by cmpxchg()ing the owner with
41 counter. If it was already taken by another task it goes to the next 38 the current task. This only works in the uncontended case (cmpxchg() checks
42 possible path. This logic is architecture specific. On x86-64, the 39 against 0UL, so all 3 state bits above have to be 0). If the lock is
43 locking fastpath is 2 instructions: 40 contended it goes to the next possible path.
44
45 0000000000000e10 <mutex_lock>:
46 e21: f0 ff 0b lock decl (%rbx)
47 e24: 79 08 jns e2e <mutex_lock+0x1e>
48
49 the unlocking fastpath is equally tight:
50
51 0000000000000bc0 <mutex_unlock>:
52 bc8: f0 ff 07 lock incl (%rdi)
53 bcb: 7f 0a jg bd7 <mutex_unlock+0x17>
54
55 41
56(ii) midpath: aka optimistic spinning, tries to spin for acquisition 42(ii) midpath: aka optimistic spinning, tries to spin for acquisition
57 while the lock owner is running and there are no other tasks ready 43 while the lock owner is running and there are no other tasks ready
@@ -143,11 +129,10 @@ Test if the mutex is taken:
143Disadvantages 129Disadvantages
144------------- 130-------------
145 131
146Unlike its original design and purpose, 'struct mutex' is larger than 132Unlike its original design and purpose, 'struct mutex' is among the largest
147most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice 133locks in the kernel. E.g: on x86-64 it is 32 bytes, where 'struct semaphore'
148as large as 'struct semaphore' (24 bytes) and tied, along with rwsems, 134is 24 bytes and rw_semaphore is 40 bytes. Larger structure sizes mean more CPU
149for the largest lock in the kernel. Larger structure sizes mean more 135cache and memory footprint.
150CPU cache and memory footprint.
151 136
152When to use mutexes 137When to use mutexes
153------------------- 138-------------------
diff --git a/Documentation/networking/segmentation-offloads.txt b/Documentation/networking/segmentation-offloads.txt
index 2f09455a993a..d47480b61ac6 100644
--- a/Documentation/networking/segmentation-offloads.txt
+++ b/Documentation/networking/segmentation-offloads.txt
@@ -13,6 +13,7 @@ The following technologies are described:
13 * Generic Segmentation Offload - GSO 13 * Generic Segmentation Offload - GSO
14 * Generic Receive Offload - GRO 14 * Generic Receive Offload - GRO
15 * Partial Generic Segmentation Offload - GSO_PARTIAL 15 * Partial Generic Segmentation Offload - GSO_PARTIAL
16 * SCTP accelleration with GSO - GSO_BY_FRAGS
16 17
17TCP Segmentation Offload 18TCP Segmentation Offload
18======================== 19========================
@@ -49,6 +50,10 @@ datagram into multiple IPv4 fragments. Many of the requirements for UDP
49fragmentation offload are the same as TSO. However the IPv4 ID for 50fragmentation offload are the same as TSO. However the IPv4 ID for
50fragments should not increment as a single IPv4 datagram is fragmented. 51fragments should not increment as a single IPv4 datagram is fragmented.
51 52
53UFO is deprecated: modern kernels will no longer generate UFO skbs, but can
54still receive them from tuntap and similar devices. Offload of UDP-based
55tunnel protocols is still supported.
56
52IPIP, SIT, GRE, UDP Tunnel, and Remote Checksum Offloads 57IPIP, SIT, GRE, UDP Tunnel, and Remote Checksum Offloads
53======================================================== 58========================================================
54 59
@@ -83,10 +88,10 @@ SKB_GSO_UDP_TUNNEL_CSUM. These two additional tunnel types reflect the
83fact that the outer header also requests to have a non-zero checksum 88fact that the outer header also requests to have a non-zero checksum
84included in the outer header. 89included in the outer header.
85 90
86Finally there is SKB_GSO_REMCSUM which indicates that a given tunnel header 91Finally there is SKB_GSO_TUNNEL_REMCSUM which indicates that a given tunnel
87has requested a remote checksum offload. In this case the inner headers 92header has requested a remote checksum offload. In this case the inner
88will be left with a partial checksum and only the outer header checksum 93headers will be left with a partial checksum and only the outer header
89will be computed. 94checksum will be computed.
90 95
91Generic Segmentation Offload 96Generic Segmentation Offload
92============================ 97============================
@@ -128,3 +133,28 @@ values for if the header was simply duplicated. The one exception to this
128is the outer IPv4 ID field. It is up to the device drivers to guarantee 133is the outer IPv4 ID field. It is up to the device drivers to guarantee
129that the IPv4 ID field is incremented in the case that a given header does 134that the IPv4 ID field is incremented in the case that a given header does
130not have the DF bit set. 135not have the DF bit set.
136
137SCTP accelleration with GSO
138===========================
139
140SCTP - despite the lack of hardware support - can still take advantage of
141GSO to pass one large packet through the network stack, rather than
142multiple small packets.
143
144This requires a different approach to other offloads, as SCTP packets
145cannot be just segmented to (P)MTU. Rather, the chunks must be contained in
146IP segments, padding respected. So unlike regular GSO, SCTP can't just
147generate a big skb, set gso_size to the fragmentation point and deliver it
148to IP layer.
149
150Instead, the SCTP protocol layer builds an skb with the segments correctly
151padded and stored as chained skbs, and skb_segment() splits based on those.
152To signal this, gso_size is set to the special value GSO_BY_FRAGS.
153
154Therefore, any code in the core networking stack must be aware of the
155possibility that gso_size will be GSO_BY_FRAGS and handle that case
156appropriately. (For size checks, the skb_gso_validate_*_len family of
157helpers do this automatically.)
158
159This also affects drivers with the NETIF_F_FRAGLIST & NETIF_F_GSO_SCTP bits
160set. Note also that NETIF_F_GSO_SCTP is included in NETIF_F_GSO_SOFTWARE.
diff --git a/MAINTAINERS b/MAINTAINERS
index 3bdc260e36b7..9a7f76eadae9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9206,6 +9206,7 @@ MIPS GENERIC PLATFORM
9206M: Paul Burton <paul.burton@mips.com> 9206M: Paul Burton <paul.burton@mips.com>
9207L: linux-mips@linux-mips.org 9207L: linux-mips@linux-mips.org
9208S: Supported 9208S: Supported
9209F: Documentation/devicetree/bindings/power/mti,mips-cpc.txt
9209F: arch/mips/generic/ 9210F: arch/mips/generic/
9210F: arch/mips/tools/generic-board-config.sh 9211F: arch/mips/tools/generic-board-config.sh
9211 9212
@@ -9945,6 +9946,7 @@ F: drivers/nfc/nxp-nci
9945 9946
9946OBJTOOL 9947OBJTOOL
9947M: Josh Poimboeuf <jpoimboe@redhat.com> 9948M: Josh Poimboeuf <jpoimboe@redhat.com>
9949M: Peter Zijlstra <peterz@infradead.org>
9948S: Supported 9950S: Supported
9949F: tools/objtool/ 9951F: tools/objtool/
9950 9952
diff --git a/Makefile b/Makefile
index 79ad2bfa24b6..d9cf3a40eda9 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
2VERSION = 4 2VERSION = 4
3PATCHLEVEL = 16 3PATCHLEVEL = 16
4SUBLEVEL = 0 4SUBLEVEL = 0
5EXTRAVERSION = -rc1 5EXTRAVERSION = -rc2
6NAME = Fearless Coyote 6NAME = Fearless Coyote
7 7
8# *DOCUMENTATION* 8# *DOCUMENTATION*
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index be7bd19c87ec..eda8c5f629fc 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -20,7 +20,7 @@
20 20
21#define MPIDR_UP_BITMASK (0x1 << 30) 21#define MPIDR_UP_BITMASK (0x1 << 30)
22#define MPIDR_MT_BITMASK (0x1 << 24) 22#define MPIDR_MT_BITMASK (0x1 << 24)
23#define MPIDR_HWID_BITMASK 0xff00ffffff 23#define MPIDR_HWID_BITMASK 0xff00ffffffUL
24 24
25#define MPIDR_LEVEL_BITS_SHIFT 3 25#define MPIDR_LEVEL_BITS_SHIFT 3
26#define MPIDR_LEVEL_BITS (1 << MPIDR_LEVEL_BITS_SHIFT) 26#define MPIDR_LEVEL_BITS (1 << MPIDR_LEVEL_BITS_SHIFT)
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 1dca41bea16a..e73f68569624 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -22,7 +22,7 @@
22 22
23static inline pte_t huge_ptep_get(pte_t *ptep) 23static inline pte_t huge_ptep_get(pte_t *ptep)
24{ 24{
25 return *ptep; 25 return READ_ONCE(*ptep);
26} 26}
27 27
28 28
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 9679067a1574..7faed6e48b46 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -185,42 +185,42 @@ static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
185 return pmd; 185 return pmd;
186} 186}
187 187
188static inline void kvm_set_s2pte_readonly(pte_t *pte) 188static inline void kvm_set_s2pte_readonly(pte_t *ptep)
189{ 189{
190 pteval_t old_pteval, pteval; 190 pteval_t old_pteval, pteval;
191 191
192 pteval = READ_ONCE(pte_val(*pte)); 192 pteval = READ_ONCE(pte_val(*ptep));
193 do { 193 do {
194 old_pteval = pteval; 194 old_pteval = pteval;
195 pteval &= ~PTE_S2_RDWR; 195 pteval &= ~PTE_S2_RDWR;
196 pteval |= PTE_S2_RDONLY; 196 pteval |= PTE_S2_RDONLY;
197 pteval = cmpxchg_relaxed(&pte_val(*pte), old_pteval, pteval); 197 pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval);
198 } while (pteval != old_pteval); 198 } while (pteval != old_pteval);
199} 199}
200 200
201static inline bool kvm_s2pte_readonly(pte_t *pte) 201static inline bool kvm_s2pte_readonly(pte_t *ptep)
202{ 202{
203 return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY; 203 return (READ_ONCE(pte_val(*ptep)) & PTE_S2_RDWR) == PTE_S2_RDONLY;
204} 204}
205 205
206static inline bool kvm_s2pte_exec(pte_t *pte) 206static inline bool kvm_s2pte_exec(pte_t *ptep)
207{ 207{
208 return !(pte_val(*pte) & PTE_S2_XN); 208 return !(READ_ONCE(pte_val(*ptep)) & PTE_S2_XN);
209} 209}
210 210
211static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) 211static inline void kvm_set_s2pmd_readonly(pmd_t *pmdp)
212{ 212{
213 kvm_set_s2pte_readonly((pte_t *)pmd); 213 kvm_set_s2pte_readonly((pte_t *)pmdp);
214} 214}
215 215
216static inline bool kvm_s2pmd_readonly(pmd_t *pmd) 216static inline bool kvm_s2pmd_readonly(pmd_t *pmdp)
217{ 217{
218 return kvm_s2pte_readonly((pte_t *)pmd); 218 return kvm_s2pte_readonly((pte_t *)pmdp);
219} 219}
220 220
221static inline bool kvm_s2pmd_exec(pmd_t *pmd) 221static inline bool kvm_s2pmd_exec(pmd_t *pmdp)
222{ 222{
223 return !(pmd_val(*pmd) & PMD_S2_XN); 223 return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);
224} 224}
225 225
226static inline bool kvm_page_empty(void *ptr) 226static inline bool kvm_page_empty(void *ptr)
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 8d3331985d2e..39ec0b8a689e 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -141,13 +141,13 @@ static inline void cpu_install_idmap(void)
141 * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, 141 * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
142 * avoiding the possibility of conflicting TLB entries being allocated. 142 * avoiding the possibility of conflicting TLB entries being allocated.
143 */ 143 */
144static inline void cpu_replace_ttbr1(pgd_t *pgd) 144static inline void cpu_replace_ttbr1(pgd_t *pgdp)
145{ 145{
146 typedef void (ttbr_replace_func)(phys_addr_t); 146 typedef void (ttbr_replace_func)(phys_addr_t);
147 extern ttbr_replace_func idmap_cpu_replace_ttbr1; 147 extern ttbr_replace_func idmap_cpu_replace_ttbr1;
148 ttbr_replace_func *replace_phys; 148 ttbr_replace_func *replace_phys;
149 149
150 phys_addr_t pgd_phys = virt_to_phys(pgd); 150 phys_addr_t pgd_phys = virt_to_phys(pgdp);
151 151
152 replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1); 152 replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
153 153
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index e9d9f1b006ef..2e05bcd944c8 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -36,23 +36,23 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
36 return (pmd_t *)__get_free_page(PGALLOC_GFP); 36 return (pmd_t *)__get_free_page(PGALLOC_GFP);
37} 37}
38 38
39static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) 39static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
40{ 40{
41 BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); 41 BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1));
42 free_page((unsigned long)pmd); 42 free_page((unsigned long)pmdp);
43} 43}
44 44
45static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot) 45static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
46{ 46{
47 set_pud(pud, __pud(__phys_to_pud_val(pmd) | prot)); 47 set_pud(pudp, __pud(__phys_to_pud_val(pmdp) | prot));
48} 48}
49 49
50static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) 50static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
51{ 51{
52 __pud_populate(pud, __pa(pmd), PMD_TYPE_TABLE); 52 __pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE);
53} 53}
54#else 54#else
55static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot) 55static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
56{ 56{
57 BUILD_BUG(); 57 BUILD_BUG();
58} 58}
@@ -65,30 +65,30 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
65 return (pud_t *)__get_free_page(PGALLOC_GFP); 65 return (pud_t *)__get_free_page(PGALLOC_GFP);
66} 66}
67 67
68static inline void pud_free(struct mm_struct *mm, pud_t *pud) 68static inline void pud_free(struct mm_struct *mm, pud_t *pudp)
69{ 69{
70 BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); 70 BUG_ON((unsigned long)pudp & (PAGE_SIZE-1));
71 free_page((unsigned long)pud); 71 free_page((unsigned long)pudp);
72} 72}
73 73
74static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot) 74static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
75{ 75{
76 set_pgd(pgdp, __pgd(__phys_to_pgd_val(pud) | prot)); 76 set_pgd(pgdp, __pgd(__phys_to_pgd_val(pudp) | prot));
77} 77}
78 78
79static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) 79static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, pud_t *pudp)
80{ 80{
81 __pgd_populate(pgd, __pa(pud), PUD_TYPE_TABLE); 81 __pgd_populate(pgdp, __pa(pudp), PUD_TYPE_TABLE);
82} 82}
83#else 83#else
84static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot) 84static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
85{ 85{
86 BUILD_BUG(); 86 BUILD_BUG();
87} 87}
88#endif /* CONFIG_PGTABLE_LEVELS > 3 */ 88#endif /* CONFIG_PGTABLE_LEVELS > 3 */
89 89
90extern pgd_t *pgd_alloc(struct mm_struct *mm); 90extern pgd_t *pgd_alloc(struct mm_struct *mm);
91extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); 91extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
92 92
93static inline pte_t * 93static inline pte_t *
94pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) 94pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
@@ -114,10 +114,10 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr)
114/* 114/*
115 * Free a PTE table. 115 * Free a PTE table.
116 */ 116 */
117static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) 117static inline void pte_free_kernel(struct mm_struct *mm, pte_t *ptep)
118{ 118{
119 if (pte) 119 if (ptep)
120 free_page((unsigned long)pte); 120 free_page((unsigned long)ptep);
121} 121}
122 122
123static inline void pte_free(struct mm_struct *mm, pgtable_t pte) 123static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
@@ -126,10 +126,10 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
126 __free_page(pte); 126 __free_page(pte);
127} 127}
128 128
129static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte, 129static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
130 pmdval_t prot) 130 pmdval_t prot)
131{ 131{
132 set_pmd(pmdp, __pmd(__phys_to_pmd_val(pte) | prot)); 132 set_pmd(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot));
133} 133}
134 134
135/* 135/*
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 094374c82db0..7e2c27e63cd8 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -218,7 +218,7 @@ static inline pmd_t pmd_mkcont(pmd_t pmd)
218 218
219static inline void set_pte(pte_t *ptep, pte_t pte) 219static inline void set_pte(pte_t *ptep, pte_t pte)
220{ 220{
221 *ptep = pte; 221 WRITE_ONCE(*ptep, pte);
222 222
223 /* 223 /*
224 * Only if the new pte is valid and kernel, otherwise TLB maintenance 224 * Only if the new pte is valid and kernel, otherwise TLB maintenance
@@ -250,6 +250,8 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
250static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, 250static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
251 pte_t *ptep, pte_t pte) 251 pte_t *ptep, pte_t pte)
252{ 252{
253 pte_t old_pte;
254
253 if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) 255 if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
254 __sync_icache_dcache(pte, addr); 256 __sync_icache_dcache(pte, addr);
255 257
@@ -258,14 +260,15 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
258 * hardware updates of the pte (ptep_set_access_flags safely changes 260 * hardware updates of the pte (ptep_set_access_flags safely changes
259 * valid ptes without going through an invalid entry). 261 * valid ptes without going through an invalid entry).
260 */ 262 */
261 if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) && 263 old_pte = READ_ONCE(*ptep);
264 if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(old_pte) && pte_valid(pte) &&
262 (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) { 265 (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {
263 VM_WARN_ONCE(!pte_young(pte), 266 VM_WARN_ONCE(!pte_young(pte),
264 "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", 267 "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
265 __func__, pte_val(*ptep), pte_val(pte)); 268 __func__, pte_val(old_pte), pte_val(pte));
266 VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(pte), 269 VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
267 "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx", 270 "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
268 __func__, pte_val(*ptep), pte_val(pte)); 271 __func__, pte_val(old_pte), pte_val(pte));
269 } 272 }
270 273
271 set_pte(ptep, pte); 274 set_pte(ptep, pte);
@@ -431,7 +434,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
431 434
432static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) 435static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
433{ 436{
434 *pmdp = pmd; 437 WRITE_ONCE(*pmdp, pmd);
435 dsb(ishst); 438 dsb(ishst);
436 isb(); 439 isb();
437} 440}
@@ -482,7 +485,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
482 485
483static inline void set_pud(pud_t *pudp, pud_t pud) 486static inline void set_pud(pud_t *pudp, pud_t pud)
484{ 487{
485 *pudp = pud; 488 WRITE_ONCE(*pudp, pud);
486 dsb(ishst); 489 dsb(ishst);
487 isb(); 490 isb();
488} 491}
@@ -500,7 +503,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
500/* Find an entry in the second-level page table. */ 503/* Find an entry in the second-level page table. */
501#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) 504#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
502 505
503#define pmd_offset_phys(dir, addr) (pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t)) 506#define pmd_offset_phys(dir, addr) (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t))
504#define pmd_offset(dir, addr) ((pmd_t *)__va(pmd_offset_phys((dir), (addr)))) 507#define pmd_offset(dir, addr) ((pmd_t *)__va(pmd_offset_phys((dir), (addr))))
505 508
506#define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr)) 509#define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
@@ -535,7 +538,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
535 538
536static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) 539static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
537{ 540{
538 *pgdp = pgd; 541 WRITE_ONCE(*pgdp, pgd);
539 dsb(ishst); 542 dsb(ishst);
540} 543}
541 544
@@ -552,7 +555,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
552/* Find an entry in the frst-level page table. */ 555/* Find an entry in the frst-level page table. */
553#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) 556#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
554 557
555#define pud_offset_phys(dir, addr) (pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t)) 558#define pud_offset_phys(dir, addr) (pgd_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))
556#define pud_offset(dir, addr) ((pud_t *)__va(pud_offset_phys((dir), (addr)))) 559#define pud_offset(dir, addr) ((pud_t *)__va(pud_offset_phys((dir), (addr))))
557 560
558#define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr)) 561#define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr))
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 07823595b7f0..52f15cd896e1 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -408,6 +408,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
408 }, 408 },
409 { 409 {
410 .capability = ARM64_HARDEN_BRANCH_PREDICTOR, 410 .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
411 MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
412 .enable = qcom_enable_link_stack_sanitization,
413 },
414 {
415 .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
416 MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
417 },
418 {
419 .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
411 MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), 420 MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
412 .enable = enable_smccc_arch_workaround_1, 421 .enable = enable_smccc_arch_workaround_1,
413 }, 422 },
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index f85ac58d08a3..a8bf1c892b90 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -90,7 +90,7 @@ static int __init set_permissions(pte_t *ptep, pgtable_t token,
90 unsigned long addr, void *data) 90 unsigned long addr, void *data)
91{ 91{
92 efi_memory_desc_t *md = data; 92 efi_memory_desc_t *md = data;
93 pte_t pte = *ptep; 93 pte_t pte = READ_ONCE(*ptep);
94 94
95 if (md->attribute & EFI_MEMORY_RO) 95 if (md->attribute & EFI_MEMORY_RO)
96 pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); 96 pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index f20cf7e99249..1ec5f28c39fc 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -202,10 +202,10 @@ static int create_safe_exec_page(void *src_start, size_t length,
202 gfp_t mask) 202 gfp_t mask)
203{ 203{
204 int rc = 0; 204 int rc = 0;
205 pgd_t *pgd; 205 pgd_t *pgdp;
206 pud_t *pud; 206 pud_t *pudp;
207 pmd_t *pmd; 207 pmd_t *pmdp;
208 pte_t *pte; 208 pte_t *ptep;
209 unsigned long dst = (unsigned long)allocator(mask); 209 unsigned long dst = (unsigned long)allocator(mask);
210 210
211 if (!dst) { 211 if (!dst) {
@@ -216,38 +216,38 @@ static int create_safe_exec_page(void *src_start, size_t length,
216 memcpy((void *)dst, src_start, length); 216 memcpy((void *)dst, src_start, length);
217 flush_icache_range(dst, dst + length); 217 flush_icache_range(dst, dst + length);
218 218
219 pgd = pgd_offset_raw(allocator(mask), dst_addr); 219 pgdp = pgd_offset_raw(allocator(mask), dst_addr);
220 if (pgd_none(*pgd)) { 220 if (pgd_none(READ_ONCE(*pgdp))) {
221 pud = allocator(mask); 221 pudp = allocator(mask);
222 if (!pud) { 222 if (!pudp) {
223 rc = -ENOMEM; 223 rc = -ENOMEM;
224 goto out; 224 goto out;
225 } 225 }
226 pgd_populate(&init_mm, pgd, pud); 226 pgd_populate(&init_mm, pgdp, pudp);
227 } 227 }
228 228
229 pud = pud_offset(pgd, dst_addr); 229 pudp = pud_offset(pgdp, dst_addr);
230 if (pud_none(*pud)) { 230 if (pud_none(READ_ONCE(*pudp))) {
231 pmd = allocator(mask); 231 pmdp = allocator(mask);
232 if (!pmd) { 232 if (!pmdp) {
233 rc = -ENOMEM; 233 rc = -ENOMEM;
234 goto out; 234 goto out;
235 } 235 }
236 pud_populate(&init_mm, pud, pmd); 236 pud_populate(&init_mm, pudp, pmdp);
237 } 237 }
238 238
239 pmd = pmd_offset(pud, dst_addr); 239 pmdp = pmd_offset(pudp, dst_addr);
240 if (pmd_none(*pmd)) { 240 if (pmd_none(READ_ONCE(*pmdp))) {
241 pte = allocator(mask); 241 ptep = allocator(mask);
242 if (!pte) { 242 if (!ptep) {
243 rc = -ENOMEM; 243 rc = -ENOMEM;
244 goto out; 244 goto out;
245 } 245 }
246 pmd_populate_kernel(&init_mm, pmd, pte); 246 pmd_populate_kernel(&init_mm, pmdp, ptep);
247 } 247 }
248 248
249 pte = pte_offset_kernel(pmd, dst_addr); 249 ptep = pte_offset_kernel(pmdp, dst_addr);
250 set_pte(pte, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC)); 250 set_pte(ptep, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
251 251
252 /* 252 /*
253 * Load our new page tables. A strict BBM approach requires that we 253 * Load our new page tables. A strict BBM approach requires that we
@@ -263,7 +263,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
263 */ 263 */
264 cpu_set_reserved_ttbr0(); 264 cpu_set_reserved_ttbr0();
265 local_flush_tlb_all(); 265 local_flush_tlb_all();
266 write_sysreg(phys_to_ttbr(virt_to_phys(pgd)), ttbr0_el1); 266 write_sysreg(phys_to_ttbr(virt_to_phys(pgdp)), ttbr0_el1);
267 isb(); 267 isb();
268 268
269 *phys_dst_addr = virt_to_phys((void *)dst); 269 *phys_dst_addr = virt_to_phys((void *)dst);
@@ -320,9 +320,9 @@ int swsusp_arch_suspend(void)
320 return ret; 320 return ret;
321} 321}
322 322
323static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr) 323static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
324{ 324{
325 pte_t pte = *src_pte; 325 pte_t pte = READ_ONCE(*src_ptep);
326 326
327 if (pte_valid(pte)) { 327 if (pte_valid(pte)) {
328 /* 328 /*
@@ -330,7 +330,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
330 * read only (code, rodata). Clear the RDONLY bit from 330 * read only (code, rodata). Clear the RDONLY bit from
331 * the temporary mappings we use during restore. 331 * the temporary mappings we use during restore.
332 */ 332 */
333 set_pte(dst_pte, pte_mkwrite(pte)); 333 set_pte(dst_ptep, pte_mkwrite(pte));
334 } else if (debug_pagealloc_enabled() && !pte_none(pte)) { 334 } else if (debug_pagealloc_enabled() && !pte_none(pte)) {
335 /* 335 /*
336 * debug_pagealloc will removed the PTE_VALID bit if 336 * debug_pagealloc will removed the PTE_VALID bit if
@@ -343,112 +343,116 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
343 */ 343 */
344 BUG_ON(!pfn_valid(pte_pfn(pte))); 344 BUG_ON(!pfn_valid(pte_pfn(pte)));
345 345
346 set_pte(dst_pte, pte_mkpresent(pte_mkwrite(pte))); 346 set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
347 } 347 }
348} 348}
349 349
350static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start, 350static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
351 unsigned long end) 351 unsigned long end)
352{ 352{
353 pte_t *src_pte; 353 pte_t *src_ptep;
354 pte_t *dst_pte; 354 pte_t *dst_ptep;
355 unsigned long addr = start; 355 unsigned long addr = start;
356 356
357 dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC); 357 dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
358 if (!dst_pte) 358 if (!dst_ptep)
359 return -ENOMEM; 359 return -ENOMEM;
360 pmd_populate_kernel(&init_mm, dst_pmd, dst_pte); 360 pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
361 dst_pte = pte_offset_kernel(dst_pmd, start); 361 dst_ptep = pte_offset_kernel(dst_pmdp, start);
362 362
363 src_pte = pte_offset_kernel(src_pmd, start); 363 src_ptep = pte_offset_kernel(src_pmdp, start);
364 do { 364 do {
365 _copy_pte(dst_pte, src_pte, addr); 365 _copy_pte(dst_ptep, src_ptep, addr);
366 } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); 366 } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
367 367
368 return 0; 368 return 0;
369} 369}
370 370
371static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start, 371static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
372 unsigned long end) 372 unsigned long end)
373{ 373{
374 pmd_t *src_pmd; 374 pmd_t *src_pmdp;
375 pmd_t *dst_pmd; 375 pmd_t *dst_pmdp;
376 unsigned long next; 376 unsigned long next;
377 unsigned long addr = start; 377 unsigned long addr = start;
378 378
379 if (pud_none(*dst_pud)) { 379 if (pud_none(READ_ONCE(*dst_pudp))) {
380 dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); 380 dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
381 if (!dst_pmd) 381 if (!dst_pmdp)
382 return -ENOMEM; 382 return -ENOMEM;
383 pud_populate(&init_mm, dst_pud, dst_pmd); 383 pud_populate(&init_mm, dst_pudp, dst_pmdp);
384 } 384 }
385 dst_pmd = pmd_offset(dst_pud, start); 385 dst_pmdp = pmd_offset(dst_pudp, start);
386 386
387 src_pmd = pmd_offset(src_pud, start); 387 src_pmdp = pmd_offset(src_pudp, start);
388 do { 388 do {
389 pmd_t pmd = READ_ONCE(*src_pmdp);
390
389 next = pmd_addr_end(addr, end); 391 next = pmd_addr_end(addr, end);
390 if (pmd_none(*src_pmd)) 392 if (pmd_none(pmd))
391 continue; 393 continue;
392 if (pmd_table(*src_pmd)) { 394 if (pmd_table(pmd)) {
393 if (copy_pte(dst_pmd, src_pmd, addr, next)) 395 if (copy_pte(dst_pmdp, src_pmdp, addr, next))
394 return -ENOMEM; 396 return -ENOMEM;
395 } else { 397 } else {
396 set_pmd(dst_pmd, 398 set_pmd(dst_pmdp,
397 __pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY)); 399 __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
398 } 400 }
399 } while (dst_pmd++, src_pmd++, addr = next, addr != end); 401 } while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
400 402
401 return 0; 403 return 0;
402} 404}
403 405
404static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start, 406static int copy_pud(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
405 unsigned long end) 407 unsigned long end)
406{ 408{
407 pud_t *dst_pud; 409 pud_t *dst_pudp;
408 pud_t *src_pud; 410 pud_t *src_pudp;
409 unsigned long next; 411 unsigned long next;
410 unsigned long addr = start; 412 unsigned long addr = start;
411 413
412 if (pgd_none(*dst_pgd)) { 414 if (pgd_none(READ_ONCE(*dst_pgdp))) {
413 dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC); 415 dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
414 if (!dst_pud) 416 if (!dst_pudp)
415 return -ENOMEM; 417 return -ENOMEM;
416 pgd_populate(&init_mm, dst_pgd, dst_pud); 418 pgd_populate(&init_mm, dst_pgdp, dst_pudp);
417 } 419 }
418 dst_pud = pud_offset(dst_pgd, start); 420 dst_pudp = pud_offset(dst_pgdp, start);
419 421
420 src_pud = pud_offset(src_pgd, start); 422 src_pudp = pud_offset(src_pgdp, start);
421 do { 423 do {
424 pud_t pud = READ_ONCE(*src_pudp);
425
422 next = pud_addr_end(addr, end); 426 next = pud_addr_end(addr, end);
423 if (pud_none(*src_pud)) 427 if (pud_none(pud))
424 continue; 428 continue;
425 if (pud_table(*(src_pud))) { 429 if (pud_table(pud)) {
426 if (copy_pmd(dst_pud, src_pud, addr, next)) 430 if (copy_pmd(dst_pudp, src_pudp, addr, next))
427 return -ENOMEM; 431 return -ENOMEM;
428 } else { 432 } else {
429 set_pud(dst_pud, 433 set_pud(dst_pudp,
430 __pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY)); 434 __pud(pud_val(pud) & ~PMD_SECT_RDONLY));
431 } 435 }
432 } while (dst_pud++, src_pud++, addr = next, addr != end); 436 } while (dst_pudp++, src_pudp++, addr = next, addr != end);
433 437
434 return 0; 438 return 0;
435} 439}
436 440
437static int copy_page_tables(pgd_t *dst_pgd, unsigned long start, 441static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
438 unsigned long end) 442 unsigned long end)
439{ 443{
440 unsigned long next; 444 unsigned long next;
441 unsigned long addr = start; 445 unsigned long addr = start;
442 pgd_t *src_pgd = pgd_offset_k(start); 446 pgd_t *src_pgdp = pgd_offset_k(start);
443 447
444 dst_pgd = pgd_offset_raw(dst_pgd, start); 448 dst_pgdp = pgd_offset_raw(dst_pgdp, start);
445 do { 449 do {
446 next = pgd_addr_end(addr, end); 450 next = pgd_addr_end(addr, end);
447 if (pgd_none(*src_pgd)) 451 if (pgd_none(READ_ONCE(*src_pgdp)))
448 continue; 452 continue;
449 if (copy_pud(dst_pgd, src_pgd, addr, next)) 453 if (copy_pud(dst_pgdp, src_pgdp, addr, next))
450 return -ENOMEM; 454 return -ENOMEM;
451 } while (dst_pgd++, src_pgd++, addr = next, addr != end); 455 } while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
452 456
453 return 0; 457 return 0;
454} 458}
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 116252a8d3a5..870f4b1587f9 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -407,8 +407,10 @@ again:
407 u32 midr = read_cpuid_id(); 407 u32 midr = read_cpuid_id();
408 408
409 /* Apply BTAC predictors mitigation to all Falkor chips */ 409 /* Apply BTAC predictors mitigation to all Falkor chips */
410 if ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1) 410 if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
411 ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) {
411 __qcom_hyp_sanitize_btac_predictors(); 412 __qcom_hyp_sanitize_btac_predictors();
413 }
412 } 414 }
413 415
414 fp_enabled = __fpsimd_enabled(); 416 fp_enabled = __fpsimd_enabled();
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 7b60d62ac593..65dfc8571bf8 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -286,48 +286,52 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
286 286
287} 287}
288 288
289static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) 289static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start)
290{ 290{
291 pte_t *pte = pte_offset_kernel(pmd, 0UL); 291 pte_t *ptep = pte_offset_kernel(pmdp, 0UL);
292 unsigned long addr; 292 unsigned long addr;
293 unsigned i; 293 unsigned i;
294 294
295 for (i = 0; i < PTRS_PER_PTE; i++, pte++) { 295 for (i = 0; i < PTRS_PER_PTE; i++, ptep++) {
296 addr = start + i * PAGE_SIZE; 296 addr = start + i * PAGE_SIZE;
297 note_page(st, addr, 4, pte_val(*pte)); 297 note_page(st, addr, 4, READ_ONCE(pte_val(*ptep)));
298 } 298 }
299} 299}
300 300
301static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) 301static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start)
302{ 302{
303 pmd_t *pmd = pmd_offset(pud, 0UL); 303 pmd_t *pmdp = pmd_offset(pudp, 0UL);
304 unsigned long addr; 304 unsigned long addr;
305 unsigned i; 305 unsigned i;
306 306
307 for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { 307 for (i = 0; i < PTRS_PER_PMD; i++, pmdp++) {
308 pmd_t pmd = READ_ONCE(*pmdp);
309
308 addr = start + i * PMD_SIZE; 310 addr = start + i * PMD_SIZE;
309 if (pmd_none(*pmd) || pmd_sect(*pmd)) { 311 if (pmd_none(pmd) || pmd_sect(pmd)) {
310 note_page(st, addr, 3, pmd_val(*pmd)); 312 note_page(st, addr, 3, pmd_val(pmd));
311 } else { 313 } else {
312 BUG_ON(pmd_bad(*pmd)); 314 BUG_ON(pmd_bad(pmd));
313 walk_pte(st, pmd, addr); 315 walk_pte(st, pmdp, addr);
314 } 316 }
315 } 317 }
316} 318}
317 319
318static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) 320static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start)
319{ 321{
320 pud_t *pud = pud_offset(pgd, 0UL); 322 pud_t *pudp = pud_offset(pgdp, 0UL);
321 unsigned long addr; 323 unsigned long addr;
322 unsigned i; 324 unsigned i;
323 325
324 for (i = 0; i < PTRS_PER_PUD; i++, pud++) { 326 for (i = 0; i < PTRS_PER_PUD; i++, pudp++) {
327 pud_t pud = READ_ONCE(*pudp);
328
325 addr = start + i * PUD_SIZE; 329 addr = start + i * PUD_SIZE;
326 if (pud_none(*pud) || pud_sect(*pud)) { 330 if (pud_none(pud) || pud_sect(pud)) {
327 note_page(st, addr, 2, pud_val(*pud)); 331 note_page(st, addr, 2, pud_val(pud));
328 } else { 332 } else {
329 BUG_ON(pud_bad(*pud)); 333 BUG_ON(pud_bad(pud));
330 walk_pmd(st, pud, addr); 334 walk_pmd(st, pudp, addr);
331 } 335 }
332 } 336 }
333} 337}
@@ -335,17 +339,19 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
335static void walk_pgd(struct pg_state *st, struct mm_struct *mm, 339static void walk_pgd(struct pg_state *st, struct mm_struct *mm,
336 unsigned long start) 340 unsigned long start)
337{ 341{
338 pgd_t *pgd = pgd_offset(mm, 0UL); 342 pgd_t *pgdp = pgd_offset(mm, 0UL);
339 unsigned i; 343 unsigned i;
340 unsigned long addr; 344 unsigned long addr;
341 345
342 for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { 346 for (i = 0; i < PTRS_PER_PGD; i++, pgdp++) {
347 pgd_t pgd = READ_ONCE(*pgdp);
348
343 addr = start + i * PGDIR_SIZE; 349 addr = start + i * PGDIR_SIZE;
344 if (pgd_none(*pgd)) { 350 if (pgd_none(pgd)) {
345 note_page(st, addr, 1, pgd_val(*pgd)); 351 note_page(st, addr, 1, pgd_val(pgd));
346 } else { 352 } else {
347 BUG_ON(pgd_bad(*pgd)); 353 BUG_ON(pgd_bad(pgd));
348 walk_pud(st, pgd, addr); 354 walk_pud(st, pgdp, addr);
349 } 355 }
350 } 356 }
351} 357}
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index f76bb2c3c943..bff11553eb05 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -130,7 +130,8 @@ static void mem_abort_decode(unsigned int esr)
130void show_pte(unsigned long addr) 130void show_pte(unsigned long addr)
131{ 131{
132 struct mm_struct *mm; 132 struct mm_struct *mm;
133 pgd_t *pgd; 133 pgd_t *pgdp;
134 pgd_t pgd;
134 135
135 if (addr < TASK_SIZE) { 136 if (addr < TASK_SIZE) {
136 /* TTBR0 */ 137 /* TTBR0 */
@@ -149,33 +150,37 @@ void show_pte(unsigned long addr)
149 return; 150 return;
150 } 151 }
151 152
152 pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgd = %p\n", 153 pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n",
153 mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, 154 mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
154 VA_BITS, mm->pgd); 155 VA_BITS, mm->pgd);
155 pgd = pgd_offset(mm, addr); 156 pgdp = pgd_offset(mm, addr);
156 pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd)); 157 pgd = READ_ONCE(*pgdp);
158 pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd));
157 159
158 do { 160 do {
159 pud_t *pud; 161 pud_t *pudp, pud;
160 pmd_t *pmd; 162 pmd_t *pmdp, pmd;
161 pte_t *pte; 163 pte_t *ptep, pte;
162 164
163 if (pgd_none(*pgd) || pgd_bad(*pgd)) 165 if (pgd_none(pgd) || pgd_bad(pgd))
164 break; 166 break;
165 167
166 pud = pud_offset(pgd, addr); 168 pudp = pud_offset(pgdp, addr);
167 pr_cont(", *pud=%016llx", pud_val(*pud)); 169 pud = READ_ONCE(*pudp);
168 if (pud_none(*pud) || pud_bad(*pud)) 170 pr_cont(", pud=%016llx", pud_val(pud));
171 if (pud_none(pud) || pud_bad(pud))
169 break; 172 break;
170 173
171 pmd = pmd_offset(pud, addr); 174 pmdp = pmd_offset(pudp, addr);
172 pr_cont(", *pmd=%016llx", pmd_val(*pmd)); 175 pmd = READ_ONCE(*pmdp);
173 if (pmd_none(*pmd) || pmd_bad(*pmd)) 176 pr_cont(", pmd=%016llx", pmd_val(pmd));
177 if (pmd_none(pmd) || pmd_bad(pmd))
174 break; 178 break;
175 179
176 pte = pte_offset_map(pmd, addr); 180 ptep = pte_offset_map(pmdp, addr);
177 pr_cont(", *pte=%016llx", pte_val(*pte)); 181 pte = READ_ONCE(*ptep);
178 pte_unmap(pte); 182 pr_cont(", pte=%016llx", pte_val(pte));
183 pte_unmap(ptep);
179 } while(0); 184 } while(0);
180 185
181 pr_cont("\n"); 186 pr_cont("\n");
@@ -196,8 +201,9 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
196 pte_t entry, int dirty) 201 pte_t entry, int dirty)
197{ 202{
198 pteval_t old_pteval, pteval; 203 pteval_t old_pteval, pteval;
204 pte_t pte = READ_ONCE(*ptep);
199 205
200 if (pte_same(*ptep, entry)) 206 if (pte_same(pte, entry))
201 return 0; 207 return 0;
202 208
203 /* only preserve the access flags and write permission */ 209 /* only preserve the access flags and write permission */
@@ -210,7 +216,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
210 * (calculated as: a & b == ~(~a | ~b)). 216 * (calculated as: a & b == ~(~a | ~b)).
211 */ 217 */
212 pte_val(entry) ^= PTE_RDONLY; 218 pte_val(entry) ^= PTE_RDONLY;
213 pteval = READ_ONCE(pte_val(*ptep)); 219 pteval = pte_val(pte);
214 do { 220 do {
215 old_pteval = pteval; 221 old_pteval = pteval;
216 pteval ^= PTE_RDONLY; 222 pteval ^= PTE_RDONLY;
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 6cb0fa92a651..ecc6818191df 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -54,14 +54,14 @@ static inline pgprot_t pte_pgprot(pte_t pte)
54static int find_num_contig(struct mm_struct *mm, unsigned long addr, 54static int find_num_contig(struct mm_struct *mm, unsigned long addr,
55 pte_t *ptep, size_t *pgsize) 55 pte_t *ptep, size_t *pgsize)
56{ 56{
57 pgd_t *pgd = pgd_offset(mm, addr); 57 pgd_t *pgdp = pgd_offset(mm, addr);
58 pud_t *pud; 58 pud_t *pudp;
59 pmd_t *pmd; 59 pmd_t *pmdp;
60 60
61 *pgsize = PAGE_SIZE; 61 *pgsize = PAGE_SIZE;
62 pud = pud_offset(pgd, addr); 62 pudp = pud_offset(pgdp, addr);
63 pmd = pmd_offset(pud, addr); 63 pmdp = pmd_offset(pudp, addr);
64 if ((pte_t *)pmd == ptep) { 64 if ((pte_t *)pmdp == ptep) {
65 *pgsize = PMD_SIZE; 65 *pgsize = PMD_SIZE;
66 return CONT_PMDS; 66 return CONT_PMDS;
67 } 67 }
@@ -181,11 +181,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
181 181
182 clear_flush(mm, addr, ptep, pgsize, ncontig); 182 clear_flush(mm, addr, ptep, pgsize, ncontig);
183 183
184 for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) { 184 for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
185 pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
186 pte_val(pfn_pte(pfn, hugeprot)));
187 set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); 185 set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
188 }
189} 186}
190 187
191void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, 188void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
@@ -203,20 +200,20 @@ void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
203pte_t *huge_pte_alloc(struct mm_struct *mm, 200pte_t *huge_pte_alloc(struct mm_struct *mm,
204 unsigned long addr, unsigned long sz) 201 unsigned long addr, unsigned long sz)
205{ 202{
206 pgd_t *pgd; 203 pgd_t *pgdp;
207 pud_t *pud; 204 pud_t *pudp;
208 pte_t *pte = NULL; 205 pmd_t *pmdp;
209 206 pte_t *ptep = NULL;
210 pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz); 207
211 pgd = pgd_offset(mm, addr); 208 pgdp = pgd_offset(mm, addr);
212 pud = pud_alloc(mm, pgd, addr); 209 pudp = pud_alloc(mm, pgdp, addr);
213 if (!pud) 210 if (!pudp)
214 return NULL; 211 return NULL;
215 212
216 if (sz == PUD_SIZE) { 213 if (sz == PUD_SIZE) {
217 pte = (pte_t *)pud; 214 ptep = (pte_t *)pudp;
218 } else if (sz == (PAGE_SIZE * CONT_PTES)) { 215 } else if (sz == (PAGE_SIZE * CONT_PTES)) {
219 pmd_t *pmd = pmd_alloc(mm, pud, addr); 216 pmdp = pmd_alloc(mm, pudp, addr);
220 217
221 WARN_ON(addr & (sz - 1)); 218 WARN_ON(addr & (sz - 1));
222 /* 219 /*
@@ -226,60 +223,55 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
226 * will be no pte_unmap() to correspond with this 223 * will be no pte_unmap() to correspond with this
227 * pte_alloc_map(). 224 * pte_alloc_map().
228 */ 225 */
229 pte = pte_alloc_map(mm, pmd, addr); 226 ptep = pte_alloc_map(mm, pmdp, addr);
230 } else if (sz == PMD_SIZE) { 227 } else if (sz == PMD_SIZE) {
231 if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && 228 if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
232 pud_none(*pud)) 229 pud_none(READ_ONCE(*pudp)))
233 pte = huge_pmd_share(mm, addr, pud); 230 ptep = huge_pmd_share(mm, addr, pudp);
234 else 231 else
235 pte = (pte_t *)pmd_alloc(mm, pud, addr); 232 ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
236 } else if (sz == (PMD_SIZE * CONT_PMDS)) { 233 } else if (sz == (PMD_SIZE * CONT_PMDS)) {
237 pmd_t *pmd; 234 pmdp = pmd_alloc(mm, pudp, addr);
238
239 pmd = pmd_alloc(mm, pud, addr);
240 WARN_ON(addr & (sz - 1)); 235 WARN_ON(addr & (sz - 1));
241 return (pte_t *)pmd; 236 return (pte_t *)pmdp;
242 } 237 }
243 238
244 pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr, 239 return ptep;
245 sz, pte, pte_val(*pte));
246 return pte;
247} 240}
248 241
249pte_t *huge_pte_offset(struct mm_struct *mm, 242pte_t *huge_pte_offset(struct mm_struct *mm,
250 unsigned long addr, unsigned long sz) 243 unsigned long addr, unsigned long sz)
251{ 244{
252 pgd_t *pgd; 245 pgd_t *pgdp;
253 pud_t *pud; 246 pud_t *pudp, pud;
254 pmd_t *pmd; 247 pmd_t *pmdp, pmd;
255 248
256 pgd = pgd_offset(mm, addr); 249 pgdp = pgd_offset(mm, addr);
257 pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd); 250 if (!pgd_present(READ_ONCE(*pgdp)))
258 if (!pgd_present(*pgd))
259 return NULL; 251 return NULL;
260 252
261 pud = pud_offset(pgd, addr); 253 pudp = pud_offset(pgdp, addr);
262 if (sz != PUD_SIZE && pud_none(*pud)) 254 pud = READ_ONCE(*pudp);
255 if (sz != PUD_SIZE && pud_none(pud))
263 return NULL; 256 return NULL;
264 /* hugepage or swap? */ 257 /* hugepage or swap? */
265 if (pud_huge(*pud) || !pud_present(*pud)) 258 if (pud_huge(pud) || !pud_present(pud))
266 return (pte_t *)pud; 259 return (pte_t *)pudp;
267 /* table; check the next level */ 260 /* table; check the next level */
268 261
269 if (sz == CONT_PMD_SIZE) 262 if (sz == CONT_PMD_SIZE)
270 addr &= CONT_PMD_MASK; 263 addr &= CONT_PMD_MASK;
271 264
272 pmd = pmd_offset(pud, addr); 265 pmdp = pmd_offset(pudp, addr);
266 pmd = READ_ONCE(*pmdp);
273 if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) && 267 if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
274 pmd_none(*pmd)) 268 pmd_none(pmd))
275 return NULL; 269 return NULL;
276 if (pmd_huge(*pmd) || !pmd_present(*pmd)) 270 if (pmd_huge(pmd) || !pmd_present(pmd))
277 return (pte_t *)pmd; 271 return (pte_t *)pmdp;
278 272
279 if (sz == CONT_PTE_SIZE) { 273 if (sz == CONT_PTE_SIZE)
280 pte_t *pte = pte_offset_kernel(pmd, (addr & CONT_PTE_MASK)); 274 return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));
281 return pte;
282 }
283 275
284 return NULL; 276 return NULL;
285} 277}
@@ -367,7 +359,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
367 size_t pgsize; 359 size_t pgsize;
368 pte_t pte; 360 pte_t pte;
369 361
370 if (!pte_cont(*ptep)) { 362 if (!pte_cont(READ_ONCE(*ptep))) {
371 ptep_set_wrprotect(mm, addr, ptep); 363 ptep_set_wrprotect(mm, addr, ptep);
372 return; 364 return;
373 } 365 }
@@ -391,7 +383,7 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma,
391 size_t pgsize; 383 size_t pgsize;
392 int ncontig; 384 int ncontig;
393 385
394 if (!pte_cont(*ptep)) { 386 if (!pte_cont(READ_ONCE(*ptep))) {
395 ptep_clear_flush(vma, addr, ptep); 387 ptep_clear_flush(vma, addr, ptep);
396 return; 388 return;
397 } 389 }
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 6e02e6fb4c7b..dabfc1ecda3d 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -44,92 +44,92 @@ static phys_addr_t __init kasan_alloc_zeroed_page(int node)
44 return __pa(p); 44 return __pa(p);
45} 45}
46 46
47static pte_t *__init kasan_pte_offset(pmd_t *pmd, unsigned long addr, int node, 47static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node,
48 bool early) 48 bool early)
49{ 49{
50 if (pmd_none(*pmd)) { 50 if (pmd_none(READ_ONCE(*pmdp))) {
51 phys_addr_t pte_phys = early ? __pa_symbol(kasan_zero_pte) 51 phys_addr_t pte_phys = early ? __pa_symbol(kasan_zero_pte)
52 : kasan_alloc_zeroed_page(node); 52 : kasan_alloc_zeroed_page(node);
53 __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); 53 __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
54 } 54 }
55 55
56 return early ? pte_offset_kimg(pmd, addr) 56 return early ? pte_offset_kimg(pmdp, addr)
57 : pte_offset_kernel(pmd, addr); 57 : pte_offset_kernel(pmdp, addr);
58} 58}
59 59
60static pmd_t *__init kasan_pmd_offset(pud_t *pud, unsigned long addr, int node, 60static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node,
61 bool early) 61 bool early)
62{ 62{
63 if (pud_none(*pud)) { 63 if (pud_none(READ_ONCE(*pudp))) {
64 phys_addr_t pmd_phys = early ? __pa_symbol(kasan_zero_pmd) 64 phys_addr_t pmd_phys = early ? __pa_symbol(kasan_zero_pmd)
65 : kasan_alloc_zeroed_page(node); 65 : kasan_alloc_zeroed_page(node);
66 __pud_populate(pud, pmd_phys, PMD_TYPE_TABLE); 66 __pud_populate(pudp, pmd_phys, PMD_TYPE_TABLE);
67 } 67 }
68 68
69 return early ? pmd_offset_kimg(pud, addr) : pmd_offset(pud, addr); 69 return early ? pmd_offset_kimg(pudp, addr) : pmd_offset(pudp, addr);
70} 70}
71 71
72static pud_t *__init kasan_pud_offset(pgd_t *pgd, unsigned long addr, int node, 72static pud_t *__init kasan_pud_offset(pgd_t *pgdp, unsigned long addr, int node,
73 bool early) 73 bool early)
74{ 74{
75 if (pgd_none(*pgd)) { 75 if (pgd_none(READ_ONCE(*pgdp))) {
76 phys_addr_t pud_phys = early ? __pa_symbol(kasan_zero_pud) 76 phys_addr_t pud_phys = early ? __pa_symbol(kasan_zero_pud)
77 : kasan_alloc_zeroed_page(node); 77 : kasan_alloc_zeroed_page(node);
78 __pgd_populate(pgd, pud_phys, PMD_TYPE_TABLE); 78 __pgd_populate(pgdp, pud_phys, PMD_TYPE_TABLE);
79 } 79 }
80 80
81 return early ? pud_offset_kimg(pgd, addr) : pud_offset(pgd, addr); 81 return early ? pud_offset_kimg(pgdp, addr) : pud_offset(pgdp, addr);
82} 82}
83 83
84static void __init kasan_pte_populate(pmd_t *pmd, unsigned long addr, 84static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
85 unsigned long end, int node, bool early) 85 unsigned long end, int node, bool early)
86{ 86{
87 unsigned long next; 87 unsigned long next;
88 pte_t *pte = kasan_pte_offset(pmd, addr, node, early); 88 pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early);
89 89
90 do { 90 do {
91 phys_addr_t page_phys = early ? __pa_symbol(kasan_zero_page) 91 phys_addr_t page_phys = early ? __pa_symbol(kasan_zero_page)
92 : kasan_alloc_zeroed_page(node); 92 : kasan_alloc_zeroed_page(node);
93 next = addr + PAGE_SIZE; 93 next = addr + PAGE_SIZE;
94 set_pte(pte, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL)); 94 set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
95 } while (pte++, addr = next, addr != end && pte_none(*pte)); 95 } while (ptep++, addr = next, addr != end && pte_none(READ_ONCE(*ptep)));
96} 96}
97 97
98static void __init kasan_pmd_populate(pud_t *pud, unsigned long addr, 98static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
99 unsigned long end, int node, bool early) 99 unsigned long end, int node, bool early)
100{ 100{
101 unsigned long next; 101 unsigned long next;
102 pmd_t *pmd = kasan_pmd_offset(pud, addr, node, early); 102 pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early);
103 103
104 do { 104 do {
105 next = pmd_addr_end(addr, end); 105 next = pmd_addr_end(addr, end);
106 kasan_pte_populate(pmd, addr, next, node, early); 106 kasan_pte_populate(pmdp, addr, next, node, early);
107 } while (pmd++, addr = next, addr != end && pmd_none(*pmd)); 107 } while (pmdp++, addr = next, addr != end && pmd_none(READ_ONCE(*pmdp)));
108} 108}
109 109
110static void __init kasan_pud_populate(pgd_t *pgd, unsigned long addr, 110static void __init kasan_pud_populate(pgd_t *pgdp, unsigned long addr,
111 unsigned long end, int node, bool early) 111 unsigned long end, int node, bool early)
112{ 112{
113 unsigned long next; 113 unsigned long next;
114 pud_t *pud = kasan_pud_offset(pgd, addr, node, early); 114 pud_t *pudp = kasan_pud_offset(pgdp, addr, node, early);
115 115
116 do { 116 do {
117 next = pud_addr_end(addr, end); 117 next = pud_addr_end(addr, end);
118 kasan_pmd_populate(pud, addr, next, node, early); 118 kasan_pmd_populate(pudp, addr, next, node, early);
119 } while (pud++, addr = next, addr != end && pud_none(*pud)); 119 } while (pudp++, addr = next, addr != end && pud_none(READ_ONCE(*pudp)));
120} 120}
121 121
122static void __init kasan_pgd_populate(unsigned long addr, unsigned long end, 122static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
123 int node, bool early) 123 int node, bool early)
124{ 124{
125 unsigned long next; 125 unsigned long next;
126 pgd_t *pgd; 126 pgd_t *pgdp;
127 127
128 pgd = pgd_offset_k(addr); 128 pgdp = pgd_offset_k(addr);
129 do { 129 do {
130 next = pgd_addr_end(addr, end); 130 next = pgd_addr_end(addr, end);
131 kasan_pud_populate(pgd, addr, next, node, early); 131 kasan_pud_populate(pgdp, addr, next, node, early);
132 } while (pgd++, addr = next, addr != end); 132 } while (pgdp++, addr = next, addr != end);
133} 133}
134 134
135/* The early shadow maps everything to a single page of zeroes */ 135/* The early shadow maps everything to a single page of zeroes */
@@ -155,14 +155,14 @@ static void __init kasan_map_populate(unsigned long start, unsigned long end,
155 */ 155 */
156void __init kasan_copy_shadow(pgd_t *pgdir) 156void __init kasan_copy_shadow(pgd_t *pgdir)
157{ 157{
158 pgd_t *pgd, *pgd_new, *pgd_end; 158 pgd_t *pgdp, *pgdp_new, *pgdp_end;
159 159
160 pgd = pgd_offset_k(KASAN_SHADOW_START); 160 pgdp = pgd_offset_k(KASAN_SHADOW_START);
161 pgd_end = pgd_offset_k(KASAN_SHADOW_END); 161 pgdp_end = pgd_offset_k(KASAN_SHADOW_END);
162 pgd_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START); 162 pgdp_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
163 do { 163 do {
164 set_pgd(pgd_new, *pgd); 164 set_pgd(pgdp_new, READ_ONCE(*pgdp));
165 } while (pgd++, pgd_new++, pgd != pgd_end); 165 } while (pgdp++, pgdp_new++, pgdp != pgdp_end);
166} 166}
167 167
168static void __init clear_pgds(unsigned long start, 168static void __init clear_pgds(unsigned long start,
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 4694cda823c9..3161b853f29e 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -125,45 +125,48 @@ static bool pgattr_change_is_safe(u64 old, u64 new)
125 return ((old ^ new) & ~mask) == 0; 125 return ((old ^ new) & ~mask) == 0;
126} 126}
127 127
128static void init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, 128static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
129 phys_addr_t phys, pgprot_t prot) 129 phys_addr_t phys, pgprot_t prot)
130{ 130{
131 pte_t *pte; 131 pte_t *ptep;
132 132
133 pte = pte_set_fixmap_offset(pmd, addr); 133 ptep = pte_set_fixmap_offset(pmdp, addr);
134 do { 134 do {
135 pte_t old_pte = *pte; 135 pte_t old_pte = READ_ONCE(*ptep);
136 136
137 set_pte(pte, pfn_pte(__phys_to_pfn(phys), prot)); 137 set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
138 138
139 /* 139 /*
140 * After the PTE entry has been populated once, we 140 * After the PTE entry has been populated once, we
141 * only allow updates to the permission attributes. 141 * only allow updates to the permission attributes.
142 */ 142 */
143 BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), pte_val(*pte))); 143 BUG_ON(!pgattr_change_is_safe(pte_val(old_pte),
144 READ_ONCE(pte_val(*ptep))));
144 145
145 phys += PAGE_SIZE; 146 phys += PAGE_SIZE;
146 } while (pte++, addr += PAGE_SIZE, addr != end); 147 } while (ptep++, addr += PAGE_SIZE, addr != end);
147 148
148 pte_clear_fixmap(); 149 pte_clear_fixmap();
149} 150}
150 151
151static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr, 152static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
152 unsigned long end, phys_addr_t phys, 153 unsigned long end, phys_addr_t phys,
153 pgprot_t prot, 154 pgprot_t prot,
154 phys_addr_t (*pgtable_alloc)(void), 155 phys_addr_t (*pgtable_alloc)(void),
155 int flags) 156 int flags)
156{ 157{
157 unsigned long next; 158 unsigned long next;
159 pmd_t pmd = READ_ONCE(*pmdp);
158 160
159 BUG_ON(pmd_sect(*pmd)); 161 BUG_ON(pmd_sect(pmd));
160 if (pmd_none(*pmd)) { 162 if (pmd_none(pmd)) {
161 phys_addr_t pte_phys; 163 phys_addr_t pte_phys;
162 BUG_ON(!pgtable_alloc); 164 BUG_ON(!pgtable_alloc);
163 pte_phys = pgtable_alloc(); 165 pte_phys = pgtable_alloc();
164 __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); 166 __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
167 pmd = READ_ONCE(*pmdp);
165 } 168 }
166 BUG_ON(pmd_bad(*pmd)); 169 BUG_ON(pmd_bad(pmd));
167 170
168 do { 171 do {
169 pgprot_t __prot = prot; 172 pgprot_t __prot = prot;
@@ -175,67 +178,69 @@ static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr,
175 (flags & NO_CONT_MAPPINGS) == 0) 178 (flags & NO_CONT_MAPPINGS) == 0)
176 __prot = __pgprot(pgprot_val(prot) | PTE_CONT); 179 __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
177 180
178 init_pte(pmd, addr, next, phys, __prot); 181 init_pte(pmdp, addr, next, phys, __prot);
179 182
180 phys += next - addr; 183 phys += next - addr;
181 } while (addr = next, addr != end); 184 } while (addr = next, addr != end);
182} 185}
183 186
184static void init_pmd(pud_t *pud, unsigned long addr, unsigned long end, 187static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
185 phys_addr_t phys, pgprot_t prot, 188 phys_addr_t phys, pgprot_t prot,
186 phys_addr_t (*pgtable_alloc)(void), int flags) 189 phys_addr_t (*pgtable_alloc)(void), int flags)
187{ 190{
188 unsigned long next; 191 unsigned long next;
189 pmd_t *pmd; 192 pmd_t *pmdp;
190 193
191 pmd = pmd_set_fixmap_offset(pud, addr); 194 pmdp = pmd_set_fixmap_offset(pudp, addr);
192 do { 195 do {
193 pmd_t old_pmd = *pmd; 196 pmd_t old_pmd = READ_ONCE(*pmdp);
194 197
195 next = pmd_addr_end(addr, end); 198 next = pmd_addr_end(addr, end);
196 199
197 /* try section mapping first */ 200 /* try section mapping first */
198 if (((addr | next | phys) & ~SECTION_MASK) == 0 && 201 if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
199 (flags & NO_BLOCK_MAPPINGS) == 0) { 202 (flags & NO_BLOCK_MAPPINGS) == 0) {
200 pmd_set_huge(pmd, phys, prot); 203 pmd_set_huge(pmdp, phys, prot);
201 204
202 /* 205 /*
203 * After the PMD entry has been populated once, we 206 * After the PMD entry has been populated once, we
204 * only allow updates to the permission attributes. 207 * only allow updates to the permission attributes.
205 */ 208 */
206 BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd), 209 BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd),
207 pmd_val(*pmd))); 210 READ_ONCE(pmd_val(*pmdp))));
208 } else { 211 } else {
209 alloc_init_cont_pte(pmd, addr, next, phys, prot, 212 alloc_init_cont_pte(pmdp, addr, next, phys, prot,
210 pgtable_alloc, flags); 213 pgtable_alloc, flags);
211 214
212 BUG_ON(pmd_val(old_pmd) != 0 && 215 BUG_ON(pmd_val(old_pmd) != 0 &&
213 pmd_val(old_pmd) != pmd_val(*pmd)); 216 pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp)));
214 } 217 }
215 phys += next - addr; 218 phys += next - addr;
216 } while (pmd++, addr = next, addr != end); 219 } while (pmdp++, addr = next, addr != end);
217 220
218 pmd_clear_fixmap(); 221 pmd_clear_fixmap();
219} 222}
220 223
221static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr, 224static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
222 unsigned long end, phys_addr_t phys, 225 unsigned long end, phys_addr_t phys,
223 pgprot_t prot, 226 pgprot_t prot,
224 phys_addr_t (*pgtable_alloc)(void), int flags) 227 phys_addr_t (*pgtable_alloc)(void), int flags)
225{ 228{
226 unsigned long next; 229 unsigned long next;
230 pud_t pud = READ_ONCE(*pudp);
227 231
228 /* 232 /*
229 * Check for initial section mappings in the pgd/pud. 233 * Check for initial section mappings in the pgd/pud.
230 */ 234 */
231 BUG_ON(pud_sect(*pud)); 235 BUG_ON(pud_sect(pud));
232 if (pud_none(*pud)) { 236 if (pud_none(pud)) {
233 phys_addr_t pmd_phys; 237 phys_addr_t pmd_phys;
234 BUG_ON(!pgtable_alloc); 238 BUG_ON(!pgtable_alloc);
235 pmd_phys = pgtable_alloc(); 239 pmd_phys = pgtable_alloc();
236 __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE); 240 __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
241 pud = READ_ONCE(*pudp);
237 } 242 }
238 BUG_ON(pud_bad(*pud)); 243 BUG_ON(pud_bad(pud));
239 244
240 do { 245 do {
241 pgprot_t __prot = prot; 246 pgprot_t __prot = prot;
@@ -247,7 +252,7 @@ static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr,
247 (flags & NO_CONT_MAPPINGS) == 0) 252 (flags & NO_CONT_MAPPINGS) == 0)
248 __prot = __pgprot(pgprot_val(prot) | PTE_CONT); 253 __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
249 254
250 init_pmd(pud, addr, next, phys, __prot, pgtable_alloc, flags); 255 init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags);
251 256
252 phys += next - addr; 257 phys += next - addr;
253 } while (addr = next, addr != end); 258 } while (addr = next, addr != end);
@@ -265,25 +270,27 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
265 return true; 270 return true;
266} 271}
267 272
268static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, 273static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
269 phys_addr_t phys, pgprot_t prot, 274 phys_addr_t phys, pgprot_t prot,
270 phys_addr_t (*pgtable_alloc)(void), 275 phys_addr_t (*pgtable_alloc)(void),
271 int flags) 276 int flags)
272{ 277{
273 pud_t *pud;
274 unsigned long next; 278 unsigned long next;
279 pud_t *pudp;
280 pgd_t pgd = READ_ONCE(*pgdp);
275 281
276 if (pgd_none(*pgd)) { 282 if (pgd_none(pgd)) {
277 phys_addr_t pud_phys; 283 phys_addr_t pud_phys;
278 BUG_ON(!pgtable_alloc); 284 BUG_ON(!pgtable_alloc);
279 pud_phys = pgtable_alloc(); 285 pud_phys = pgtable_alloc();
280 __pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE); 286 __pgd_populate(pgdp, pud_phys, PUD_TYPE_TABLE);
287 pgd = READ_ONCE(*pgdp);
281 } 288 }
282 BUG_ON(pgd_bad(*pgd)); 289 BUG_ON(pgd_bad(pgd));
283 290
284 pud = pud_set_fixmap_offset(pgd, addr); 291 pudp = pud_set_fixmap_offset(pgdp, addr);
285 do { 292 do {
286 pud_t old_pud = *pud; 293 pud_t old_pud = READ_ONCE(*pudp);
287 294
288 next = pud_addr_end(addr, end); 295 next = pud_addr_end(addr, end);
289 296
@@ -292,23 +299,23 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
292 */ 299 */
293 if (use_1G_block(addr, next, phys) && 300 if (use_1G_block(addr, next, phys) &&
294 (flags & NO_BLOCK_MAPPINGS) == 0) { 301 (flags & NO_BLOCK_MAPPINGS) == 0) {
295 pud_set_huge(pud, phys, prot); 302 pud_set_huge(pudp, phys, prot);
296 303
297 /* 304 /*
298 * After the PUD entry has been populated once, we 305 * After the PUD entry has been populated once, we
299 * only allow updates to the permission attributes. 306 * only allow updates to the permission attributes.
300 */ 307 */
301 BUG_ON(!pgattr_change_is_safe(pud_val(old_pud), 308 BUG_ON(!pgattr_change_is_safe(pud_val(old_pud),
302 pud_val(*pud))); 309 READ_ONCE(pud_val(*pudp))));
303 } else { 310 } else {
304 alloc_init_cont_pmd(pud, addr, next, phys, prot, 311 alloc_init_cont_pmd(pudp, addr, next, phys, prot,
305 pgtable_alloc, flags); 312 pgtable_alloc, flags);
306 313
307 BUG_ON(pud_val(old_pud) != 0 && 314 BUG_ON(pud_val(old_pud) != 0 &&
308 pud_val(old_pud) != pud_val(*pud)); 315 pud_val(old_pud) != READ_ONCE(pud_val(*pudp)));
309 } 316 }
310 phys += next - addr; 317 phys += next - addr;
311 } while (pud++, addr = next, addr != end); 318 } while (pudp++, addr = next, addr != end);
312 319
313 pud_clear_fixmap(); 320 pud_clear_fixmap();
314} 321}
@@ -320,7 +327,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
320 int flags) 327 int flags)
321{ 328{
322 unsigned long addr, length, end, next; 329 unsigned long addr, length, end, next;
323 pgd_t *pgd = pgd_offset_raw(pgdir, virt); 330 pgd_t *pgdp = pgd_offset_raw(pgdir, virt);
324 331
325 /* 332 /*
326 * If the virtual and physical address don't have the same offset 333 * If the virtual and physical address don't have the same offset
@@ -336,10 +343,10 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
336 end = addr + length; 343 end = addr + length;
337 do { 344 do {
338 next = pgd_addr_end(addr, end); 345 next = pgd_addr_end(addr, end);
339 alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc, 346 alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
340 flags); 347 flags);
341 phys += next - addr; 348 phys += next - addr;
342 } while (pgd++, addr = next, addr != end); 349 } while (pgdp++, addr = next, addr != end);
343} 350}
344 351
345static phys_addr_t pgd_pgtable_alloc(void) 352static phys_addr_t pgd_pgtable_alloc(void)
@@ -401,10 +408,10 @@ static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
401 flush_tlb_kernel_range(virt, virt + size); 408 flush_tlb_kernel_range(virt, virt + size);
402} 409}
403 410
404static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, 411static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start,
405 phys_addr_t end, pgprot_t prot, int flags) 412 phys_addr_t end, pgprot_t prot, int flags)
406{ 413{
407 __create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start, 414 __create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start,
408 prot, early_pgtable_alloc, flags); 415 prot, early_pgtable_alloc, flags);
409} 416}
410 417
@@ -418,7 +425,7 @@ void __init mark_linear_text_alias_ro(void)
418 PAGE_KERNEL_RO); 425 PAGE_KERNEL_RO);
419} 426}
420 427
421static void __init map_mem(pgd_t *pgd) 428static void __init map_mem(pgd_t *pgdp)
422{ 429{
423 phys_addr_t kernel_start = __pa_symbol(_text); 430 phys_addr_t kernel_start = __pa_symbol(_text);
424 phys_addr_t kernel_end = __pa_symbol(__init_begin); 431 phys_addr_t kernel_end = __pa_symbol(__init_begin);
@@ -451,7 +458,7 @@ static void __init map_mem(pgd_t *pgd)
451 if (memblock_is_nomap(reg)) 458 if (memblock_is_nomap(reg))
452 continue; 459 continue;
453 460
454 __map_memblock(pgd, start, end, PAGE_KERNEL, flags); 461 __map_memblock(pgdp, start, end, PAGE_KERNEL, flags);
455 } 462 }
456 463
457 /* 464 /*
@@ -464,7 +471,7 @@ static void __init map_mem(pgd_t *pgd)
464 * Note that contiguous mappings cannot be remapped in this way, 471 * Note that contiguous mappings cannot be remapped in this way,
465 * so we should avoid them here. 472 * so we should avoid them here.
466 */ 473 */
467 __map_memblock(pgd, kernel_start, kernel_end, 474 __map_memblock(pgdp, kernel_start, kernel_end,
468 PAGE_KERNEL, NO_CONT_MAPPINGS); 475 PAGE_KERNEL, NO_CONT_MAPPINGS);
469 memblock_clear_nomap(kernel_start, kernel_end - kernel_start); 476 memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
470 477
@@ -475,7 +482,7 @@ static void __init map_mem(pgd_t *pgd)
475 * through /sys/kernel/kexec_crash_size interface. 482 * through /sys/kernel/kexec_crash_size interface.
476 */ 483 */
477 if (crashk_res.end) { 484 if (crashk_res.end) {
478 __map_memblock(pgd, crashk_res.start, crashk_res.end + 1, 485 __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,
479 PAGE_KERNEL, 486 PAGE_KERNEL,
480 NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); 487 NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
481 memblock_clear_nomap(crashk_res.start, 488 memblock_clear_nomap(crashk_res.start,
@@ -499,7 +506,7 @@ void mark_rodata_ro(void)
499 debug_checkwx(); 506 debug_checkwx();
500} 507}
501 508
502static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, 509static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
503 pgprot_t prot, struct vm_struct *vma, 510 pgprot_t prot, struct vm_struct *vma,
504 int flags, unsigned long vm_flags) 511 int flags, unsigned long vm_flags)
505{ 512{
@@ -509,7 +516,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
509 BUG_ON(!PAGE_ALIGNED(pa_start)); 516 BUG_ON(!PAGE_ALIGNED(pa_start));
510 BUG_ON(!PAGE_ALIGNED(size)); 517 BUG_ON(!PAGE_ALIGNED(size));
511 518
512 __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot, 519 __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot,
513 early_pgtable_alloc, flags); 520 early_pgtable_alloc, flags);
514 521
515 if (!(vm_flags & VM_NO_GUARD)) 522 if (!(vm_flags & VM_NO_GUARD))
@@ -562,7 +569,7 @@ core_initcall(map_entry_trampoline);
562/* 569/*
563 * Create fine-grained mappings for the kernel. 570 * Create fine-grained mappings for the kernel.
564 */ 571 */
565static void __init map_kernel(pgd_t *pgd) 572static void __init map_kernel(pgd_t *pgdp)
566{ 573{
567 static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext, 574 static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext,
568 vmlinux_initdata, vmlinux_data; 575 vmlinux_initdata, vmlinux_data;
@@ -578,24 +585,24 @@ static void __init map_kernel(pgd_t *pgd)
578 * Only rodata will be remapped with different permissions later on, 585 * Only rodata will be remapped with different permissions later on,
579 * all other segments are allowed to use contiguous mappings. 586 * all other segments are allowed to use contiguous mappings.
580 */ 587 */
581 map_kernel_segment(pgd, _text, _etext, text_prot, &vmlinux_text, 0, 588 map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0,
582 VM_NO_GUARD); 589 VM_NO_GUARD);
583 map_kernel_segment(pgd, __start_rodata, __inittext_begin, PAGE_KERNEL, 590 map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
584 &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD); 591 &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
585 map_kernel_segment(pgd, __inittext_begin, __inittext_end, text_prot, 592 map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot,
586 &vmlinux_inittext, 0, VM_NO_GUARD); 593 &vmlinux_inittext, 0, VM_NO_GUARD);
587 map_kernel_segment(pgd, __initdata_begin, __initdata_end, PAGE_KERNEL, 594 map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL,
588 &vmlinux_initdata, 0, VM_NO_GUARD); 595 &vmlinux_initdata, 0, VM_NO_GUARD);
589 map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0); 596 map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
590 597
591 if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) { 598 if (!READ_ONCE(pgd_val(*pgd_offset_raw(pgdp, FIXADDR_START)))) {
592 /* 599 /*
593 * The fixmap falls in a separate pgd to the kernel, and doesn't 600 * The fixmap falls in a separate pgd to the kernel, and doesn't
594 * live in the carveout for the swapper_pg_dir. We can simply 601 * live in the carveout for the swapper_pg_dir. We can simply
595 * re-use the existing dir for the fixmap. 602 * re-use the existing dir for the fixmap.
596 */ 603 */
597 set_pgd(pgd_offset_raw(pgd, FIXADDR_START), 604 set_pgd(pgd_offset_raw(pgdp, FIXADDR_START),
598 *pgd_offset_k(FIXADDR_START)); 605 READ_ONCE(*pgd_offset_k(FIXADDR_START)));
599 } else if (CONFIG_PGTABLE_LEVELS > 3) { 606 } else if (CONFIG_PGTABLE_LEVELS > 3) {
600 /* 607 /*
601 * The fixmap shares its top level pgd entry with the kernel 608 * The fixmap shares its top level pgd entry with the kernel
@@ -604,14 +611,15 @@ static void __init map_kernel(pgd_t *pgd)
604 * entry instead. 611 * entry instead.
605 */ 612 */
606 BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); 613 BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
607 pud_populate(&init_mm, pud_set_fixmap_offset(pgd, FIXADDR_START), 614 pud_populate(&init_mm,
615 pud_set_fixmap_offset(pgdp, FIXADDR_START),
608 lm_alias(bm_pmd)); 616 lm_alias(bm_pmd));
609 pud_clear_fixmap(); 617 pud_clear_fixmap();
610 } else { 618 } else {
611 BUG(); 619 BUG();
612 } 620 }
613 621
614 kasan_copy_shadow(pgd); 622 kasan_copy_shadow(pgdp);
615} 623}
616 624
617/* 625/*
@@ -621,10 +629,10 @@ static void __init map_kernel(pgd_t *pgd)
621void __init paging_init(void) 629void __init paging_init(void)
622{ 630{
623 phys_addr_t pgd_phys = early_pgtable_alloc(); 631 phys_addr_t pgd_phys = early_pgtable_alloc();
624 pgd_t *pgd = pgd_set_fixmap(pgd_phys); 632 pgd_t *pgdp = pgd_set_fixmap(pgd_phys);
625 633
626 map_kernel(pgd); 634 map_kernel(pgdp);
627 map_mem(pgd); 635 map_mem(pgdp);
628 636
629 /* 637 /*
630 * We want to reuse the original swapper_pg_dir so we don't have to 638 * We want to reuse the original swapper_pg_dir so we don't have to
@@ -635,7 +643,7 @@ void __init paging_init(void)
635 * To do this we need to go via a temporary pgd. 643 * To do this we need to go via a temporary pgd.
636 */ 644 */
637 cpu_replace_ttbr1(__va(pgd_phys)); 645 cpu_replace_ttbr1(__va(pgd_phys));
638 memcpy(swapper_pg_dir, pgd, PGD_SIZE); 646 memcpy(swapper_pg_dir, pgdp, PGD_SIZE);
639 cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); 647 cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
640 648
641 pgd_clear_fixmap(); 649 pgd_clear_fixmap();
@@ -655,37 +663,40 @@ void __init paging_init(void)
655 */ 663 */
656int kern_addr_valid(unsigned long addr) 664int kern_addr_valid(unsigned long addr)
657{ 665{
658 pgd_t *pgd; 666 pgd_t *pgdp;
659 pud_t *pud; 667 pud_t *pudp, pud;
660 pmd_t *pmd; 668 pmd_t *pmdp, pmd;
661 pte_t *pte; 669 pte_t *ptep, pte;
662 670
663 if ((((long)addr) >> VA_BITS) != -1UL) 671 if ((((long)addr) >> VA_BITS) != -1UL)
664 return 0; 672 return 0;
665 673
666 pgd = pgd_offset_k(addr); 674 pgdp = pgd_offset_k(addr);
667 if (pgd_none(*pgd)) 675 if (pgd_none(READ_ONCE(*pgdp)))
668 return 0; 676 return 0;
669 677
670 pud = pud_offset(pgd, addr); 678 pudp = pud_offset(pgdp, addr);
671 if (pud_none(*pud)) 679 pud = READ_ONCE(*pudp);
680 if (pud_none(pud))
672 return 0; 681 return 0;
673 682
674 if (pud_sect(*pud)) 683 if (pud_sect(pud))
675 return pfn_valid(pud_pfn(*pud)); 684 return pfn_valid(pud_pfn(pud));
676 685
677 pmd = pmd_offset(pud, addr); 686 pmdp = pmd_offset(pudp, addr);
678 if (pmd_none(*pmd)) 687 pmd = READ_ONCE(*pmdp);
688 if (pmd_none(pmd))
679 return 0; 689 return 0;
680 690
681 if (pmd_sect(*pmd)) 691 if (pmd_sect(pmd))
682 return pfn_valid(pmd_pfn(*pmd)); 692 return pfn_valid(pmd_pfn(pmd));
683 693
684 pte = pte_offset_kernel(pmd, addr); 694 ptep = pte_offset_kernel(pmdp, addr);
685 if (pte_none(*pte)) 695 pte = READ_ONCE(*ptep);
696 if (pte_none(pte))
686 return 0; 697 return 0;
687 698
688 return pfn_valid(pte_pfn(*pte)); 699 return pfn_valid(pte_pfn(pte));
689} 700}
690#ifdef CONFIG_SPARSEMEM_VMEMMAP 701#ifdef CONFIG_SPARSEMEM_VMEMMAP
691#if !ARM64_SWAPPER_USES_SECTION_MAPS 702#if !ARM64_SWAPPER_USES_SECTION_MAPS
@@ -700,32 +711,32 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
700{ 711{
701 unsigned long addr = start; 712 unsigned long addr = start;
702 unsigned long next; 713 unsigned long next;
703 pgd_t *pgd; 714 pgd_t *pgdp;
704 pud_t *pud; 715 pud_t *pudp;
705 pmd_t *pmd; 716 pmd_t *pmdp;
706 717
707 do { 718 do {
708 next = pmd_addr_end(addr, end); 719 next = pmd_addr_end(addr, end);
709 720
710 pgd = vmemmap_pgd_populate(addr, node); 721 pgdp = vmemmap_pgd_populate(addr, node);
711 if (!pgd) 722 if (!pgdp)
712 return -ENOMEM; 723 return -ENOMEM;
713 724
714 pud = vmemmap_pud_populate(pgd, addr, node); 725 pudp = vmemmap_pud_populate(pgdp, addr, node);
715 if (!pud) 726 if (!pudp)
716 return -ENOMEM; 727 return -ENOMEM;
717 728
718 pmd = pmd_offset(pud, addr); 729 pmdp = pmd_offset(pudp, addr);
719 if (pmd_none(*pmd)) { 730 if (pmd_none(READ_ONCE(*pmdp))) {
720 void *p = NULL; 731 void *p = NULL;
721 732
722 p = vmemmap_alloc_block_buf(PMD_SIZE, node); 733 p = vmemmap_alloc_block_buf(PMD_SIZE, node);
723 if (!p) 734 if (!p)
724 return -ENOMEM; 735 return -ENOMEM;
725 736
726 pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL)); 737 pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
727 } else 738 } else
728 vmemmap_verify((pte_t *)pmd, node, addr, next); 739 vmemmap_verify((pte_t *)pmdp, node, addr, next);
729 } while (addr = next, addr != end); 740 } while (addr = next, addr != end);
730 741
731 return 0; 742 return 0;
@@ -739,20 +750,22 @@ void vmemmap_free(unsigned long start, unsigned long end,
739 750
740static inline pud_t * fixmap_pud(unsigned long addr) 751static inline pud_t * fixmap_pud(unsigned long addr)
741{ 752{
742 pgd_t *pgd = pgd_offset_k(addr); 753 pgd_t *pgdp = pgd_offset_k(addr);
754 pgd_t pgd = READ_ONCE(*pgdp);
743 755
744 BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); 756 BUG_ON(pgd_none(pgd) || pgd_bad(pgd));
745 757
746 return pud_offset_kimg(pgd, addr); 758 return pud_offset_kimg(pgdp, addr);
747} 759}
748 760
749static inline pmd_t * fixmap_pmd(unsigned long addr) 761static inline pmd_t * fixmap_pmd(unsigned long addr)
750{ 762{
751 pud_t *pud = fixmap_pud(addr); 763 pud_t *pudp = fixmap_pud(addr);
764 pud_t pud = READ_ONCE(*pudp);
752 765
753 BUG_ON(pud_none(*pud) || pud_bad(*pud)); 766 BUG_ON(pud_none(pud) || pud_bad(pud));
754 767
755 return pmd_offset_kimg(pud, addr); 768 return pmd_offset_kimg(pudp, addr);
756} 769}
757 770
758static inline pte_t * fixmap_pte(unsigned long addr) 771static inline pte_t * fixmap_pte(unsigned long addr)
@@ -768,30 +781,31 @@ static inline pte_t * fixmap_pte(unsigned long addr)
768 */ 781 */
769void __init early_fixmap_init(void) 782void __init early_fixmap_init(void)
770{ 783{
771 pgd_t *pgd; 784 pgd_t *pgdp, pgd;
772 pud_t *pud; 785 pud_t *pudp;
773 pmd_t *pmd; 786 pmd_t *pmdp;
774 unsigned long addr = FIXADDR_START; 787 unsigned long addr = FIXADDR_START;
775 788
776 pgd = pgd_offset_k(addr); 789 pgdp = pgd_offset_k(addr);
790 pgd = READ_ONCE(*pgdp);
777 if (CONFIG_PGTABLE_LEVELS > 3 && 791 if (CONFIG_PGTABLE_LEVELS > 3 &&
778 !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa_symbol(bm_pud))) { 792 !(pgd_none(pgd) || pgd_page_paddr(pgd) == __pa_symbol(bm_pud))) {
779 /* 793 /*
780 * We only end up here if the kernel mapping and the fixmap 794 * We only end up here if the kernel mapping and the fixmap
781 * share the top level pgd entry, which should only happen on 795 * share the top level pgd entry, which should only happen on
782 * 16k/4 levels configurations. 796 * 16k/4 levels configurations.
783 */ 797 */
784 BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); 798 BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
785 pud = pud_offset_kimg(pgd, addr); 799 pudp = pud_offset_kimg(pgdp, addr);
786 } else { 800 } else {
787 if (pgd_none(*pgd)) 801 if (pgd_none(pgd))
788 __pgd_populate(pgd, __pa_symbol(bm_pud), PUD_TYPE_TABLE); 802 __pgd_populate(pgdp, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
789 pud = fixmap_pud(addr); 803 pudp = fixmap_pud(addr);
790 } 804 }
791 if (pud_none(*pud)) 805 if (pud_none(READ_ONCE(*pudp)))
792 __pud_populate(pud, __pa_symbol(bm_pmd), PMD_TYPE_TABLE); 806 __pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
793 pmd = fixmap_pmd(addr); 807 pmdp = fixmap_pmd(addr);
794 __pmd_populate(pmd, __pa_symbol(bm_pte), PMD_TYPE_TABLE); 808 __pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
795 809
796 /* 810 /*
797 * The boot-ioremap range spans multiple pmds, for which 811 * The boot-ioremap range spans multiple pmds, for which
@@ -800,11 +814,11 @@ void __init early_fixmap_init(void)
800 BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) 814 BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
801 != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); 815 != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
802 816
803 if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN))) 817 if ((pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
804 || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) { 818 || pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
805 WARN_ON(1); 819 WARN_ON(1);
806 pr_warn("pmd %p != %p, %p\n", 820 pr_warn("pmdp %p != %p, %p\n",
807 pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)), 821 pmdp, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
808 fixmap_pmd(fix_to_virt(FIX_BTMAP_END))); 822 fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
809 pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", 823 pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
810 fix_to_virt(FIX_BTMAP_BEGIN)); 824 fix_to_virt(FIX_BTMAP_BEGIN));
@@ -824,16 +838,16 @@ void __set_fixmap(enum fixed_addresses idx,
824 phys_addr_t phys, pgprot_t flags) 838 phys_addr_t phys, pgprot_t flags)
825{ 839{
826 unsigned long addr = __fix_to_virt(idx); 840 unsigned long addr = __fix_to_virt(idx);
827 pte_t *pte; 841 pte_t *ptep;
828 842
829 BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); 843 BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
830 844
831 pte = fixmap_pte(addr); 845 ptep = fixmap_pte(addr);
832 846
833 if (pgprot_val(flags)) { 847 if (pgprot_val(flags)) {
834 set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); 848 set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags));
835 } else { 849 } else {
836 pte_clear(&init_mm, addr, pte); 850 pte_clear(&init_mm, addr, ptep);
837 flush_tlb_kernel_range(addr, addr+PAGE_SIZE); 851 flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
838 } 852 }
839} 853}
@@ -915,36 +929,36 @@ int __init arch_ioremap_pmd_supported(void)
915 return 1; 929 return 1;
916} 930}
917 931
918int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot) 932int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
919{ 933{
920 pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | 934 pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
921 pgprot_val(mk_sect_prot(prot))); 935 pgprot_val(mk_sect_prot(prot)));
922 BUG_ON(phys & ~PUD_MASK); 936 BUG_ON(phys & ~PUD_MASK);
923 set_pud(pud, pfn_pud(__phys_to_pfn(phys), sect_prot)); 937 set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));
924 return 1; 938 return 1;
925} 939}
926 940
927int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot) 941int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
928{ 942{
929 pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | 943 pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
930 pgprot_val(mk_sect_prot(prot))); 944 pgprot_val(mk_sect_prot(prot)));
931 BUG_ON(phys & ~PMD_MASK); 945 BUG_ON(phys & ~PMD_MASK);
932 set_pmd(pmd, pfn_pmd(__phys_to_pfn(phys), sect_prot)); 946 set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot));
933 return 1; 947 return 1;
934} 948}
935 949
936int pud_clear_huge(pud_t *pud) 950int pud_clear_huge(pud_t *pudp)
937{ 951{
938 if (!pud_sect(*pud)) 952 if (!pud_sect(READ_ONCE(*pudp)))
939 return 0; 953 return 0;
940 pud_clear(pud); 954 pud_clear(pudp);
941 return 1; 955 return 1;
942} 956}
943 957
944int pmd_clear_huge(pmd_t *pmd) 958int pmd_clear_huge(pmd_t *pmdp)
945{ 959{
946 if (!pmd_sect(*pmd)) 960 if (!pmd_sect(READ_ONCE(*pmdp)))
947 return 0; 961 return 0;
948 pmd_clear(pmd); 962 pmd_clear(pmdp);
949 return 1; 963 return 1;
950} 964}
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index a682a0a2a0fa..a56359373d8b 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -29,7 +29,7 @@ static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
29 void *data) 29 void *data)
30{ 30{
31 struct page_change_data *cdata = data; 31 struct page_change_data *cdata = data;
32 pte_t pte = *ptep; 32 pte_t pte = READ_ONCE(*ptep);
33 33
34 pte = clear_pte_bit(pte, cdata->clear_mask); 34 pte = clear_pte_bit(pte, cdata->clear_mask);
35 pte = set_pte_bit(pte, cdata->set_mask); 35 pte = set_pte_bit(pte, cdata->set_mask);
@@ -156,30 +156,32 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
156 */ 156 */
157bool kernel_page_present(struct page *page) 157bool kernel_page_present(struct page *page)
158{ 158{
159 pgd_t *pgd; 159 pgd_t *pgdp;
160 pud_t *pud; 160 pud_t *pudp, pud;
161 pmd_t *pmd; 161 pmd_t *pmdp, pmd;
162 pte_t *pte; 162 pte_t *ptep;
163 unsigned long addr = (unsigned long)page_address(page); 163 unsigned long addr = (unsigned long)page_address(page);
164 164
165 pgd = pgd_offset_k(addr); 165 pgdp = pgd_offset_k(addr);
166 if (pgd_none(*pgd)) 166 if (pgd_none(READ_ONCE(*pgdp)))
167 return false; 167 return false;
168 168
169 pud = pud_offset(pgd, addr); 169 pudp = pud_offset(pgdp, addr);
170 if (pud_none(*pud)) 170 pud = READ_ONCE(*pudp);
171 if (pud_none(pud))
171 return false; 172 return false;
172 if (pud_sect(*pud)) 173 if (pud_sect(pud))
173 return true; 174 return true;
174 175
175 pmd = pmd_offset(pud, addr); 176 pmdp = pmd_offset(pudp, addr);
176 if (pmd_none(*pmd)) 177 pmd = READ_ONCE(*pmdp);
178 if (pmd_none(pmd))
177 return false; 179 return false;
178 if (pmd_sect(*pmd)) 180 if (pmd_sect(pmd))
179 return true; 181 return true;
180 182
181 pte = pte_offset_kernel(pmd, addr); 183 ptep = pte_offset_kernel(pmdp, addr);
182 return pte_valid(*pte); 184 return pte_valid(READ_ONCE(*ptep));
183} 185}
184#endif /* CONFIG_HIBERNATION */ 186#endif /* CONFIG_HIBERNATION */
185#endif /* CONFIG_DEBUG_PAGEALLOC */ 187#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 71baed7e592a..c0af47617299 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -205,7 +205,8 @@ ENDPROC(idmap_cpu_replace_ttbr1)
205 dc cvac, cur_\()\type\()p // Ensure any existing dirty 205 dc cvac, cur_\()\type\()p // Ensure any existing dirty
206 dmb sy // lines are written back before 206 dmb sy // lines are written back before
207 ldr \type, [cur_\()\type\()p] // loading the entry 207 ldr \type, [cur_\()\type\()p] // loading the entry
208 tbz \type, #0, next_\()\type // Skip invalid entries 208 tbz \type, #0, skip_\()\type // Skip invalid and
209 tbnz \type, #11, skip_\()\type // non-global entries
209 .endm 210 .endm
210 211
211 .macro __idmap_kpti_put_pgtable_ent_ng, type 212 .macro __idmap_kpti_put_pgtable_ent_ng, type
@@ -265,8 +266,9 @@ ENTRY(idmap_kpti_install_ng_mappings)
265 add end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8) 266 add end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8)
266do_pgd: __idmap_kpti_get_pgtable_ent pgd 267do_pgd: __idmap_kpti_get_pgtable_ent pgd
267 tbnz pgd, #1, walk_puds 268 tbnz pgd, #1, walk_puds
268 __idmap_kpti_put_pgtable_ent_ng pgd
269next_pgd: 269next_pgd:
270 __idmap_kpti_put_pgtable_ent_ng pgd
271skip_pgd:
270 add cur_pgdp, cur_pgdp, #8 272 add cur_pgdp, cur_pgdp, #8
271 cmp cur_pgdp, end_pgdp 273 cmp cur_pgdp, end_pgdp
272 b.ne do_pgd 274 b.ne do_pgd
@@ -294,8 +296,9 @@ walk_puds:
294 add end_pudp, cur_pudp, #(PTRS_PER_PUD * 8) 296 add end_pudp, cur_pudp, #(PTRS_PER_PUD * 8)
295do_pud: __idmap_kpti_get_pgtable_ent pud 297do_pud: __idmap_kpti_get_pgtable_ent pud
296 tbnz pud, #1, walk_pmds 298 tbnz pud, #1, walk_pmds
297 __idmap_kpti_put_pgtable_ent_ng pud
298next_pud: 299next_pud:
300 __idmap_kpti_put_pgtable_ent_ng pud
301skip_pud:
299 add cur_pudp, cur_pudp, 8 302 add cur_pudp, cur_pudp, 8
300 cmp cur_pudp, end_pudp 303 cmp cur_pudp, end_pudp
301 b.ne do_pud 304 b.ne do_pud
@@ -314,8 +317,9 @@ walk_pmds:
314 add end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8) 317 add end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8)
315do_pmd: __idmap_kpti_get_pgtable_ent pmd 318do_pmd: __idmap_kpti_get_pgtable_ent pmd
316 tbnz pmd, #1, walk_ptes 319 tbnz pmd, #1, walk_ptes
317 __idmap_kpti_put_pgtable_ent_ng pmd
318next_pmd: 320next_pmd:
321 __idmap_kpti_put_pgtable_ent_ng pmd
322skip_pmd:
319 add cur_pmdp, cur_pmdp, #8 323 add cur_pmdp, cur_pmdp, #8
320 cmp cur_pmdp, end_pmdp 324 cmp cur_pmdp, end_pmdp
321 b.ne do_pmd 325 b.ne do_pmd
@@ -333,7 +337,7 @@ walk_ptes:
333 add end_ptep, cur_ptep, #(PTRS_PER_PTE * 8) 337 add end_ptep, cur_ptep, #(PTRS_PER_PTE * 8)
334do_pte: __idmap_kpti_get_pgtable_ent pte 338do_pte: __idmap_kpti_get_pgtable_ent pte
335 __idmap_kpti_put_pgtable_ent_ng pte 339 __idmap_kpti_put_pgtable_ent_ng pte
336next_pte: 340skip_pte:
337 add cur_ptep, cur_ptep, #8 341 add cur_ptep, cur_ptep, #8
338 cmp cur_ptep, end_ptep 342 cmp cur_ptep, end_ptep
339 b.ne do_pte 343 b.ne do_pte
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 0b4c65a1af25..498f3da3f225 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -41,7 +41,6 @@ ifneq ($(CONFIG_IA64_ESI),)
41obj-y += esi_stub.o # must be in kernel proper 41obj-y += esi_stub.o # must be in kernel proper
42endif 42endif
43obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o 43obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o
44obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
45 44
46obj-$(CONFIG_BINFMT_ELF) += elfcore.o 45obj-$(CONFIG_BINFMT_ELF) += elfcore.o
47 46
diff --git a/arch/mips/kernel/mips-cpc.c b/arch/mips/kernel/mips-cpc.c
index 19c88d770054..fcf9af492d60 100644
--- a/arch/mips/kernel/mips-cpc.c
+++ b/arch/mips/kernel/mips-cpc.c
@@ -10,6 +10,8 @@
10 10
11#include <linux/errno.h> 11#include <linux/errno.h>
12#include <linux/percpu.h> 12#include <linux/percpu.h>
13#include <linux/of.h>
14#include <linux/of_address.h>
13#include <linux/spinlock.h> 15#include <linux/spinlock.h>
14 16
15#include <asm/mips-cps.h> 17#include <asm/mips-cps.h>
@@ -22,6 +24,17 @@ static DEFINE_PER_CPU_ALIGNED(unsigned long, cpc_core_lock_flags);
22 24
23phys_addr_t __weak mips_cpc_default_phys_base(void) 25phys_addr_t __weak mips_cpc_default_phys_base(void)
24{ 26{
27 struct device_node *cpc_node;
28 struct resource res;
29 int err;
30
31 cpc_node = of_find_compatible_node(of_root, NULL, "mti,mips-cpc");
32 if (cpc_node) {
33 err = of_address_to_resource(cpc_node, 0, &res);
34 if (!err)
35 return res.start;
36 }
37
25 return 0; 38 return 0;
26} 39}
27 40
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 85bc601e9a0d..5f8b0a9e30b3 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -375,6 +375,7 @@ static void __init bootmem_init(void)
375 unsigned long reserved_end; 375 unsigned long reserved_end;
376 unsigned long mapstart = ~0UL; 376 unsigned long mapstart = ~0UL;
377 unsigned long bootmap_size; 377 unsigned long bootmap_size;
378 phys_addr_t ramstart = (phys_addr_t)ULLONG_MAX;
378 bool bootmap_valid = false; 379 bool bootmap_valid = false;
379 int i; 380 int i;
380 381
@@ -395,7 +396,8 @@ static void __init bootmem_init(void)
395 max_low_pfn = 0; 396 max_low_pfn = 0;
396 397
397 /* 398 /*
398 * Find the highest page frame number we have available. 399 * Find the highest page frame number we have available
400 * and the lowest used RAM address
399 */ 401 */
400 for (i = 0; i < boot_mem_map.nr_map; i++) { 402 for (i = 0; i < boot_mem_map.nr_map; i++) {
401 unsigned long start, end; 403 unsigned long start, end;
@@ -407,6 +409,8 @@ static void __init bootmem_init(void)
407 end = PFN_DOWN(boot_mem_map.map[i].addr 409 end = PFN_DOWN(boot_mem_map.map[i].addr
408 + boot_mem_map.map[i].size); 410 + boot_mem_map.map[i].size);
409 411
412 ramstart = min(ramstart, boot_mem_map.map[i].addr);
413
410#ifndef CONFIG_HIGHMEM 414#ifndef CONFIG_HIGHMEM
411 /* 415 /*
412 * Skip highmem here so we get an accurate max_low_pfn if low 416 * Skip highmem here so we get an accurate max_low_pfn if low
@@ -436,6 +440,13 @@ static void __init bootmem_init(void)
436 mapstart = max(reserved_end, start); 440 mapstart = max(reserved_end, start);
437 } 441 }
438 442
443 /*
444 * Reserve any memory between the start of RAM and PHYS_OFFSET
445 */
446 if (ramstart > PHYS_OFFSET)
447 add_memory_region(PHYS_OFFSET, ramstart - PHYS_OFFSET,
448 BOOT_MEM_RESERVED);
449
439 if (min_low_pfn >= max_low_pfn) 450 if (min_low_pfn >= max_low_pfn)
440 panic("Incorrect memory mapping !!!"); 451 panic("Incorrect memory mapping !!!");
441 if (min_low_pfn > ARCH_PFN_OFFSET) { 452 if (min_low_pfn > ARCH_PFN_OFFSET) {
@@ -664,9 +675,6 @@ static int __init early_parse_mem(char *p)
664 675
665 add_memory_region(start, size, BOOT_MEM_RAM); 676 add_memory_region(start, size, BOOT_MEM_RAM);
666 677
667 if (start && start > PHYS_OFFSET)
668 add_memory_region(PHYS_OFFSET, start - PHYS_OFFSET,
669 BOOT_MEM_RESERVED);
670 return 0; 678 return 0;
671} 679}
672early_param("mem", early_parse_mem); 680early_param("mem", early_parse_mem);
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 87dcac2447c8..9d41732a9146 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -572,7 +572,7 @@ asmlinkage void __weak plat_wired_tlb_setup(void)
572 */ 572 */
573} 573}
574 574
575void __init bmips_cpu_setup(void) 575void bmips_cpu_setup(void)
576{ 576{
577 void __iomem __maybe_unused *cbr = BMIPS_GET_CBR(); 577 void __iomem __maybe_unused *cbr = BMIPS_GET_CBR();
578 u32 __maybe_unused cfg; 578 u32 __maybe_unused cfg;
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 30a155c0a6b0..c615abdce119 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -16,6 +16,7 @@
16#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT) 16#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
17 17
18#define PMD_CACHE_INDEX PMD_INDEX_SIZE 18#define PMD_CACHE_INDEX PMD_INDEX_SIZE
19#define PUD_CACHE_INDEX PUD_INDEX_SIZE
19 20
20#ifndef __ASSEMBLY__ 21#ifndef __ASSEMBLY__
21#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) 22#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 949d691094a4..67c5475311ee 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -63,7 +63,8 @@ static inline int hash__hugepd_ok(hugepd_t hpd)
63 * keeping the prototype consistent across the two formats. 63 * keeping the prototype consistent across the two formats.
64 */ 64 */
65static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte, 65static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
66 unsigned int subpg_index, unsigned long hidx) 66 unsigned int subpg_index, unsigned long hidx,
67 int offset)
67{ 68{
68 return (hidx << H_PAGE_F_GIX_SHIFT) & 69 return (hidx << H_PAGE_F_GIX_SHIFT) &
69 (H_PAGE_F_SECOND | H_PAGE_F_GIX); 70 (H_PAGE_F_SECOND | H_PAGE_F_GIX);
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 338b7da468ce..3bcf269f8f55 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -45,7 +45,7 @@
45 * generic accessors and iterators here 45 * generic accessors and iterators here
46 */ 46 */
47#define __real_pte __real_pte 47#define __real_pte __real_pte
48static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) 48static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset)
49{ 49{
50 real_pte_t rpte; 50 real_pte_t rpte;
51 unsigned long *hidxp; 51 unsigned long *hidxp;
@@ -59,7 +59,7 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
59 */ 59 */
60 smp_rmb(); 60 smp_rmb();
61 61
62 hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); 62 hidxp = (unsigned long *)(ptep + offset);
63 rpte.hidx = *hidxp; 63 rpte.hidx = *hidxp;
64 return rpte; 64 return rpte;
65} 65}
@@ -86,9 +86,10 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
86 * expected to modify the PTE bits accordingly and commit the PTE to memory. 86 * expected to modify the PTE bits accordingly and commit the PTE to memory.
87 */ 87 */
88static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte, 88static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
89 unsigned int subpg_index, unsigned long hidx) 89 unsigned int subpg_index,
90 unsigned long hidx, int offset)
90{ 91{
91 unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); 92 unsigned long *hidxp = (unsigned long *)(ptep + offset);
92 93
93 rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index); 94 rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index);
94 *hidxp = rpte.hidx | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index); 95 *hidxp = rpte.hidx | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index);
@@ -140,13 +141,18 @@ static inline int hash__remap_4k_pfn(struct vm_area_struct *vma, unsigned long a
140} 141}
141 142
142#define H_PTE_TABLE_SIZE PTE_FRAG_SIZE 143#define H_PTE_TABLE_SIZE PTE_FRAG_SIZE
143#ifdef CONFIG_TRANSPARENT_HUGEPAGE 144#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE)
144#define H_PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \ 145#define H_PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \
145 (sizeof(unsigned long) << PMD_INDEX_SIZE)) 146 (sizeof(unsigned long) << PMD_INDEX_SIZE))
146#else 147#else
147#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) 148#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
148#endif 149#endif
150#ifdef CONFIG_HUGETLB_PAGE
151#define H_PUD_TABLE_SIZE ((sizeof(pud_t) << PUD_INDEX_SIZE) + \
152 (sizeof(unsigned long) << PUD_INDEX_SIZE))
153#else
149#define H_PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) 154#define H_PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
155#endif
150#define H_PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) 156#define H_PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
151 157
152#ifdef CONFIG_TRANSPARENT_HUGEPAGE 158#ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 0920eff731b3..935adcd92a81 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -23,7 +23,8 @@
23 H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT) 23 H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
24#define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE) 24#define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
25 25
26#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_PPC_64K_PAGES) 26#if (defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)) && \
27 defined(CONFIG_PPC_64K_PAGES)
27/* 28/*
28 * only with hash 64k we need to use the second half of pmd page table 29 * only with hash 64k we need to use the second half of pmd page table
29 * to store pointer to deposited pgtable_t 30 * to store pointer to deposited pgtable_t
@@ -33,6 +34,16 @@
33#define H_PMD_CACHE_INDEX H_PMD_INDEX_SIZE 34#define H_PMD_CACHE_INDEX H_PMD_INDEX_SIZE
34#endif 35#endif
35/* 36/*
37 * We store the slot details in the second half of page table.
38 * Increase the pud level table so that hugetlb ptes can be stored
39 * at pud level.
40 */
41#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_64K_PAGES)
42#define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE + 1)
43#else
44#define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE)
45#endif
46/*
36 * Define the address range of the kernel non-linear virtual area 47 * Define the address range of the kernel non-linear virtual area
37 */ 48 */
38#define H_KERN_VIRT_START ASM_CONST(0xD000000000000000) 49#define H_KERN_VIRT_START ASM_CONST(0xD000000000000000)
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 1fcfa425cefa..4746bc68d446 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -73,10 +73,16 @@ static inline void radix__pgd_free(struct mm_struct *mm, pgd_t *pgd)
73 73
74static inline pgd_t *pgd_alloc(struct mm_struct *mm) 74static inline pgd_t *pgd_alloc(struct mm_struct *mm)
75{ 75{
76 pgd_t *pgd;
77
76 if (radix_enabled()) 78 if (radix_enabled())
77 return radix__pgd_alloc(mm); 79 return radix__pgd_alloc(mm);
78 return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), 80
79 pgtable_gfp_flags(mm, GFP_KERNEL)); 81 pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
82 pgtable_gfp_flags(mm, GFP_KERNEL));
83 memset(pgd, 0, PGD_TABLE_SIZE);
84
85 return pgd;
80} 86}
81 87
82static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) 88static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -93,13 +99,13 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
93 99
94static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) 100static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
95{ 101{
96 return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), 102 return kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX),
97 pgtable_gfp_flags(mm, GFP_KERNEL)); 103 pgtable_gfp_flags(mm, GFP_KERNEL));
98} 104}
99 105
100static inline void pud_free(struct mm_struct *mm, pud_t *pud) 106static inline void pud_free(struct mm_struct *mm, pud_t *pud)
101{ 107{
102 kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud); 108 kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);
103} 109}
104 110
105static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) 111static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
@@ -115,7 +121,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
115 * ahead and flush the page walk cache 121 * ahead and flush the page walk cache
116 */ 122 */
117 flush_tlb_pgtable(tlb, address); 123 flush_tlb_pgtable(tlb, address);
118 pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE); 124 pgtable_free_tlb(tlb, pud, PUD_CACHE_INDEX);
119} 125}
120 126
121static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) 127static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 51017726d495..a6b9f1d74600 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -232,11 +232,13 @@ extern unsigned long __pmd_index_size;
232extern unsigned long __pud_index_size; 232extern unsigned long __pud_index_size;
233extern unsigned long __pgd_index_size; 233extern unsigned long __pgd_index_size;
234extern unsigned long __pmd_cache_index; 234extern unsigned long __pmd_cache_index;
235extern unsigned long __pud_cache_index;
235#define PTE_INDEX_SIZE __pte_index_size 236#define PTE_INDEX_SIZE __pte_index_size
236#define PMD_INDEX_SIZE __pmd_index_size 237#define PMD_INDEX_SIZE __pmd_index_size
237#define PUD_INDEX_SIZE __pud_index_size 238#define PUD_INDEX_SIZE __pud_index_size
238#define PGD_INDEX_SIZE __pgd_index_size 239#define PGD_INDEX_SIZE __pgd_index_size
239#define PMD_CACHE_INDEX __pmd_cache_index 240#define PMD_CACHE_INDEX __pmd_cache_index
241#define PUD_CACHE_INDEX __pud_cache_index
240/* 242/*
241 * Because of use of pte fragments and THP, size of page table 243 * Because of use of pte fragments and THP, size of page table
242 * are not always derived out of index size above. 244 * are not always derived out of index size above.
@@ -348,7 +350,7 @@ extern unsigned long pci_io_base;
348 */ 350 */
349#ifndef __real_pte 351#ifndef __real_pte
350 352
351#define __real_pte(e,p) ((real_pte_t){(e)}) 353#define __real_pte(e, p, o) ((real_pte_t){(e)})
352#define __rpte_to_pte(r) ((r).pte) 354#define __rpte_to_pte(r) ((r).pte)
353#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT) 355#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT)
354 356
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 176dfb73d42c..471b2274fbeb 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -645,7 +645,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
645 EXC_HV, SOFTEN_TEST_HV, bitmask) 645 EXC_HV, SOFTEN_TEST_HV, bitmask)
646 646
647#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \ 647#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \
648 MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\ 648 MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\
649 EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV) 649 EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
650 650
651/* 651/*
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 88e5e8f17e98..855e17d158b1 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -30,6 +30,16 @@
30#define PACA_IRQ_PMI 0x40 30#define PACA_IRQ_PMI 0x40
31 31
32/* 32/*
33 * Some soft-masked interrupts must be hard masked until they are replayed
34 * (e.g., because the soft-masked handler does not clear the exception).
35 */
36#ifdef CONFIG_PPC_BOOK3S
37#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE|PACA_IRQ_PMI)
38#else
39#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE)
40#endif
41
42/*
33 * flags for paca->irq_soft_mask 43 * flags for paca->irq_soft_mask
34 */ 44 */
35#define IRQS_ENABLED 0 45#define IRQS_ENABLED 0
@@ -244,7 +254,7 @@ static inline bool lazy_irq_pending(void)
244static inline void may_hard_irq_enable(void) 254static inline void may_hard_irq_enable(void)
245{ 255{
246 get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS; 256 get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
247 if (!(get_paca()->irq_happened & PACA_IRQ_EE)) 257 if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK))
248 __hard_irq_enable(); 258 __hard_irq_enable();
249} 259}
250 260
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 9dcbfa6bbb91..d8b1e8e7e035 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -140,6 +140,12 @@ static inline bool kdump_in_progress(void)
140 return false; 140 return false;
141} 141}
142 142
143static inline void crash_ipi_callback(struct pt_regs *regs) { }
144
145static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
146{
147}
148
143#endif /* CONFIG_KEXEC_CORE */ 149#endif /* CONFIG_KEXEC_CORE */
144#endif /* ! __ASSEMBLY__ */ 150#endif /* ! __ASSEMBLY__ */
145#endif /* __KERNEL__ */ 151#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 504a3c36ce5c..03bbd1149530 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -24,6 +24,7 @@ extern int icache_44x_need_flush;
24#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT) 24#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
25 25
26#define PMD_CACHE_INDEX PMD_INDEX_SIZE 26#define PMD_CACHE_INDEX PMD_INDEX_SIZE
27#define PUD_CACHE_INDEX PUD_INDEX_SIZE
27 28
28#ifndef __ASSEMBLY__ 29#ifndef __ASSEMBLY__
29#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) 30#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index abddf5830ad5..5c5f75d005ad 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -27,6 +27,7 @@
27#else 27#else
28#define PMD_CACHE_INDEX PMD_INDEX_SIZE 28#define PMD_CACHE_INDEX PMD_INDEX_SIZE
29#endif 29#endif
30#define PUD_CACHE_INDEX PUD_INDEX_SIZE
30 31
31/* 32/*
32 * Define the address range of the kernel non-linear virtual area 33 * Define the address range of the kernel non-linear virtual area
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 88187c285c70..9f421641a35c 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -44,6 +44,11 @@ extern int sysfs_add_device_to_node(struct device *dev, int nid);
44extern void sysfs_remove_device_from_node(struct device *dev, int nid); 44extern void sysfs_remove_device_from_node(struct device *dev, int nid);
45extern int numa_update_cpu_topology(bool cpus_locked); 45extern int numa_update_cpu_topology(bool cpus_locked);
46 46
47static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node)
48{
49 numa_cpu_lookup_table[cpu] = node;
50}
51
47static inline int early_cpu_to_node(int cpu) 52static inline int early_cpu_to_node(int cpu)
48{ 53{
49 int nid; 54 int nid;
@@ -76,12 +81,16 @@ static inline int numa_update_cpu_topology(bool cpus_locked)
76{ 81{
77 return 0; 82 return 0;
78} 83}
84
85static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {}
86
79#endif /* CONFIG_NUMA */ 87#endif /* CONFIG_NUMA */
80 88
81#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) 89#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
82extern int start_topology_update(void); 90extern int start_topology_update(void);
83extern int stop_topology_update(void); 91extern int stop_topology_update(void);
84extern int prrn_is_enabled(void); 92extern int prrn_is_enabled(void);
93extern int find_and_online_cpu_nid(int cpu);
85#else 94#else
86static inline int start_topology_update(void) 95static inline int start_topology_update(void)
87{ 96{
@@ -95,6 +104,10 @@ static inline int prrn_is_enabled(void)
95{ 104{
96 return 0; 105 return 0;
97} 106}
107static inline int find_and_online_cpu_nid(int cpu)
108{
109 return 0;
110}
98#endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */ 111#endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
99 112
100#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES) 113#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES)
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index ee832d344a5a..9b6e653e501a 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -943,6 +943,8 @@ kernel_dbg_exc:
943/* 943/*
944 * An interrupt came in while soft-disabled; We mark paca->irq_happened 944 * An interrupt came in while soft-disabled; We mark paca->irq_happened
945 * accordingly and if the interrupt is level sensitive, we hard disable 945 * accordingly and if the interrupt is level sensitive, we hard disable
946 * hard disable (full_mask) corresponds to PACA_IRQ_MUST_HARD_MASK, so
947 * keep these in synch.
946 */ 948 */
947 949
948.macro masked_interrupt_book3e paca_irq full_mask 950.macro masked_interrupt_book3e paca_irq full_mask
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 243d072a225a..3ac87e53b3da 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1426,7 +1426,7 @@ EXC_COMMON_BEGIN(soft_nmi_common)
1426 * triggered and won't automatically refire. 1426 * triggered and won't automatically refire.
1427 * - If it was a HMI we return immediately since we handled it in realmode 1427 * - If it was a HMI we return immediately since we handled it in realmode
1428 * and it won't refire. 1428 * and it won't refire.
1429 * - else we hard disable and return. 1429 * - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.
1430 * This is called with r10 containing the value to OR to the paca field. 1430 * This is called with r10 containing the value to OR to the paca field.
1431 */ 1431 */
1432#define MASKED_INTERRUPT(_H) \ 1432#define MASKED_INTERRUPT(_H) \
@@ -1441,8 +1441,8 @@ masked_##_H##interrupt: \
1441 ori r10,r10,0xffff; \ 1441 ori r10,r10,0xffff; \
1442 mtspr SPRN_DEC,r10; \ 1442 mtspr SPRN_DEC,r10; \
1443 b MASKED_DEC_HANDLER_LABEL; \ 1443 b MASKED_DEC_HANDLER_LABEL; \
14441: andi. r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI); \ 14441: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK; \
1445 bne 2f; \ 1445 beq 2f; \
1446 mfspr r10,SPRN_##_H##SRR1; \ 1446 mfspr r10,SPRN_##_H##SRR1; \
1447 xori r10,r10,MSR_EE; /* clear MSR_EE */ \ 1447 xori r10,r10,MSR_EE; /* clear MSR_EE */ \
1448 mtspr SPRN_##_H##SRR1,r10; \ 1448 mtspr SPRN_##_H##SRR1,r10; \
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 5a8bfee6e187..04d0bbd7a1dd 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -788,7 +788,8 @@ static int register_cpu_online(unsigned int cpu)
788 if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2)) 788 if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
789 device_create_file(s, &dev_attr_pir); 789 device_create_file(s, &dev_attr_pir);
790 790
791 if (cpu_has_feature(CPU_FTR_ARCH_206)) 791 if (cpu_has_feature(CPU_FTR_ARCH_206) &&
792 !firmware_has_feature(FW_FEATURE_LPAR))
792 device_create_file(s, &dev_attr_tscr); 793 device_create_file(s, &dev_attr_tscr);
793#endif /* CONFIG_PPC64 */ 794#endif /* CONFIG_PPC64 */
794 795
@@ -873,7 +874,8 @@ static int unregister_cpu_online(unsigned int cpu)
873 if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2)) 874 if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
874 device_remove_file(s, &dev_attr_pir); 875 device_remove_file(s, &dev_attr_pir);
875 876
876 if (cpu_has_feature(CPU_FTR_ARCH_206)) 877 if (cpu_has_feature(CPU_FTR_ARCH_206) &&
878 !firmware_has_feature(FW_FEATURE_LPAR))
877 device_remove_file(s, &dev_attr_tscr); 879 device_remove_file(s, &dev_attr_tscr);
878#endif /* CONFIG_PPC64 */ 880#endif /* CONFIG_PPC64 */
879 881
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 1604110c4238..916844f99c64 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -216,6 +216,8 @@ static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm,
216 u32 i, n_lmbs; 216 u32 i, n_lmbs;
217 217
218 n_lmbs = of_read_number(prop++, 1); 218 n_lmbs = of_read_number(prop++, 1);
219 if (n_lmbs == 0)
220 return;
219 221
220 for (i = 0; i < n_lmbs; i++) { 222 for (i = 0; i < n_lmbs; i++) {
221 read_drconf_v1_cell(&lmb, &prop); 223 read_drconf_v1_cell(&lmb, &prop);
@@ -245,6 +247,8 @@ static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm,
245 u32 i, j, lmb_sets; 247 u32 i, j, lmb_sets;
246 248
247 lmb_sets = of_read_number(prop++, 1); 249 lmb_sets = of_read_number(prop++, 1);
250 if (lmb_sets == 0)
251 return;
248 252
249 for (i = 0; i < lmb_sets; i++) { 253 for (i = 0; i < lmb_sets; i++) {
250 read_drconf_v2_cell(&dr_cell, &prop); 254 read_drconf_v2_cell(&dr_cell, &prop);
@@ -354,6 +358,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)
354 struct drmem_lmb *lmb; 358 struct drmem_lmb *lmb;
355 359
356 drmem_info->n_lmbs = of_read_number(prop++, 1); 360 drmem_info->n_lmbs = of_read_number(prop++, 1);
361 if (drmem_info->n_lmbs == 0)
362 return;
357 363
358 drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb), 364 drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
359 GFP_KERNEL); 365 GFP_KERNEL);
@@ -373,6 +379,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
373 int lmb_index; 379 int lmb_index;
374 380
375 lmb_sets = of_read_number(prop++, 1); 381 lmb_sets = of_read_number(prop++, 1);
382 if (lmb_sets == 0)
383 return;
376 384
377 /* first pass, calculate the number of LMBs */ 385 /* first pass, calculate the number of LMBs */
378 p = prop; 386 p = prop;
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index 5a69b51d08a3..d573d7d07f25 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -55,7 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
55 * need to add in 0x1 if it's a read-only user page 55 * need to add in 0x1 if it's a read-only user page
56 */ 56 */
57 rflags = htab_convert_pte_flags(new_pte); 57 rflags = htab_convert_pte_flags(new_pte);
58 rpte = __real_pte(__pte(old_pte), ptep); 58 rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
59 59
60 if (cpu_has_feature(CPU_FTR_NOEXECUTE) && 60 if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
61 !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) 61 !cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -117,7 +117,7 @@ repeat:
117 return -1; 117 return -1;
118 } 118 }
119 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; 119 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
120 new_pte |= pte_set_hidx(ptep, rpte, 0, slot); 120 new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
121 } 121 }
122 *ptep = __pte(new_pte & ~H_PAGE_BUSY); 122 *ptep = __pte(new_pte & ~H_PAGE_BUSY);
123 return 0; 123 return 0;
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 2253bbc6a599..e601d95c3b20 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -86,7 +86,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
86 86
87 subpg_index = (ea & (PAGE_SIZE - 1)) >> shift; 87 subpg_index = (ea & (PAGE_SIZE - 1)) >> shift;
88 vpn = hpt_vpn(ea, vsid, ssize); 88 vpn = hpt_vpn(ea, vsid, ssize);
89 rpte = __real_pte(__pte(old_pte), ptep); 89 rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
90 /* 90 /*
91 *None of the sub 4k page is hashed 91 *None of the sub 4k page is hashed
92 */ 92 */
@@ -214,7 +214,7 @@ repeat:
214 return -1; 214 return -1;
215 } 215 }
216 216
217 new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot); 217 new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE);
218 new_pte |= H_PAGE_HASHPTE; 218 new_pte |= H_PAGE_HASHPTE;
219 219
220 *ptep = __pte(new_pte & ~H_PAGE_BUSY); 220 *ptep = __pte(new_pte & ~H_PAGE_BUSY);
@@ -262,7 +262,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
262 } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); 262 } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
263 263
264 rflags = htab_convert_pte_flags(new_pte); 264 rflags = htab_convert_pte_flags(new_pte);
265 rpte = __real_pte(__pte(old_pte), ptep); 265 rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
266 266
267 if (cpu_has_feature(CPU_FTR_NOEXECUTE) && 267 if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
268 !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) 268 !cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -327,7 +327,7 @@ repeat:
327 } 327 }
328 328
329 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; 329 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
330 new_pte |= pte_set_hidx(ptep, rpte, 0, slot); 330 new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
331 } 331 }
332 *ptep = __pte(new_pte & ~H_PAGE_BUSY); 332 *ptep = __pte(new_pte & ~H_PAGE_BUSY);
333 return 0; 333 return 0;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 7d07c7e17db6..cf290d415dcd 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1008,6 +1008,7 @@ void __init hash__early_init_mmu(void)
1008 __pmd_index_size = H_PMD_INDEX_SIZE; 1008 __pmd_index_size = H_PMD_INDEX_SIZE;
1009 __pud_index_size = H_PUD_INDEX_SIZE; 1009 __pud_index_size = H_PUD_INDEX_SIZE;
1010 __pgd_index_size = H_PGD_INDEX_SIZE; 1010 __pgd_index_size = H_PGD_INDEX_SIZE;
1011 __pud_cache_index = H_PUD_CACHE_INDEX;
1011 __pmd_cache_index = H_PMD_CACHE_INDEX; 1012 __pmd_cache_index = H_PMD_CACHE_INDEX;
1012 __pte_table_size = H_PTE_TABLE_SIZE; 1013 __pte_table_size = H_PTE_TABLE_SIZE;
1013 __pmd_table_size = H_PMD_TABLE_SIZE; 1014 __pmd_table_size = H_PMD_TABLE_SIZE;
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 12511f5a015f..b320f5097a06 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -27,7 +27,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
27 unsigned long vpn; 27 unsigned long vpn;
28 unsigned long old_pte, new_pte; 28 unsigned long old_pte, new_pte;
29 unsigned long rflags, pa, sz; 29 unsigned long rflags, pa, sz;
30 long slot; 30 long slot, offset;
31 31
32 BUG_ON(shift != mmu_psize_defs[mmu_psize].shift); 32 BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
33 33
@@ -63,7 +63,11 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
63 } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); 63 } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
64 64
65 rflags = htab_convert_pte_flags(new_pte); 65 rflags = htab_convert_pte_flags(new_pte);
66 rpte = __real_pte(__pte(old_pte), ptep); 66 if (unlikely(mmu_psize == MMU_PAGE_16G))
67 offset = PTRS_PER_PUD;
68 else
69 offset = PTRS_PER_PMD;
70 rpte = __real_pte(__pte(old_pte), ptep, offset);
67 71
68 sz = ((1UL) << shift); 72 sz = ((1UL) << shift);
69 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) 73 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -104,7 +108,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
104 return -1; 108 return -1;
105 } 109 }
106 110
107 new_pte |= pte_set_hidx(ptep, rpte, 0, slot); 111 new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset);
108 } 112 }
109 113
110 /* 114 /*
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index eb8c6c8c4851..2b656e67f2ea 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -100,6 +100,6 @@ void pgtable_cache_init(void)
100 * same size as either the pgd or pmd index except with THP enabled 100 * same size as either the pgd or pmd index except with THP enabled
101 * on book3s 64 101 * on book3s 64
102 */ 102 */
103 if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) 103 if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX))
104 pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor); 104 pgtable_cache_add(PUD_CACHE_INDEX, pud_ctor);
105} 105}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 314d19ab9385..edd8d0bc9364 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -143,11 +143,6 @@ static void reset_numa_cpu_lookup_table(void)
143 numa_cpu_lookup_table[cpu] = -1; 143 numa_cpu_lookup_table[cpu] = -1;
144} 144}
145 145
146static void update_numa_cpu_lookup_table(unsigned int cpu, int node)
147{
148 numa_cpu_lookup_table[cpu] = node;
149}
150
151static void map_cpu_to_node(int cpu, int node) 146static void map_cpu_to_node(int cpu, int node)
152{ 147{
153 update_numa_cpu_lookup_table(cpu, node); 148 update_numa_cpu_lookup_table(cpu, node);
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 573a9a2ee455..2e10a964e290 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -17,9 +17,11 @@
17#include <linux/of_fdt.h> 17#include <linux/of_fdt.h>
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <linux/string_helpers.h> 19#include <linux/string_helpers.h>
20#include <linux/stop_machine.h>
20 21
21#include <asm/pgtable.h> 22#include <asm/pgtable.h>
22#include <asm/pgalloc.h> 23#include <asm/pgalloc.h>
24#include <asm/mmu_context.h>
23#include <asm/dma.h> 25#include <asm/dma.h>
24#include <asm/machdep.h> 26#include <asm/machdep.h>
25#include <asm/mmu.h> 27#include <asm/mmu.h>
@@ -333,6 +335,22 @@ static void __init radix_init_pgtable(void)
333 "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); 335 "r" (TLBIEL_INVAL_SET_LPID), "r" (0));
334 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 336 asm volatile("eieio; tlbsync; ptesync" : : : "memory");
335 trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1); 337 trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1);
338
339 /*
340 * The init_mm context is given the first available (non-zero) PID,
341 * which is the "guard PID" and contains no page table. PIDR should
342 * never be set to zero because that duplicates the kernel address
343 * space at the 0x0... offset (quadrant 0)!
344 *
345 * An arbitrary PID that may later be allocated by the PID allocator
346 * for userspace processes must not be used either, because that
347 * would cause stale user mappings for that PID on CPUs outside of
348 * the TLB invalidation scheme (because it won't be in mm_cpumask).
349 *
350 * So permanently carve out one PID for the purpose of a guard PID.
351 */
352 init_mm.context.id = mmu_base_pid;
353 mmu_base_pid++;
336} 354}
337 355
338static void __init radix_init_partition_table(void) 356static void __init radix_init_partition_table(void)
@@ -535,6 +553,7 @@ void __init radix__early_init_mmu(void)
535 __pmd_index_size = RADIX_PMD_INDEX_SIZE; 553 __pmd_index_size = RADIX_PMD_INDEX_SIZE;
536 __pud_index_size = RADIX_PUD_INDEX_SIZE; 554 __pud_index_size = RADIX_PUD_INDEX_SIZE;
537 __pgd_index_size = RADIX_PGD_INDEX_SIZE; 555 __pgd_index_size = RADIX_PGD_INDEX_SIZE;
556 __pud_cache_index = RADIX_PUD_INDEX_SIZE;
538 __pmd_cache_index = RADIX_PMD_INDEX_SIZE; 557 __pmd_cache_index = RADIX_PMD_INDEX_SIZE;
539 __pte_table_size = RADIX_PTE_TABLE_SIZE; 558 __pte_table_size = RADIX_PTE_TABLE_SIZE;
540 __pmd_table_size = RADIX_PMD_TABLE_SIZE; 559 __pmd_table_size = RADIX_PMD_TABLE_SIZE;
@@ -579,7 +598,8 @@ void __init radix__early_init_mmu(void)
579 598
580 radix_init_iamr(); 599 radix_init_iamr();
581 radix_init_pgtable(); 600 radix_init_pgtable();
582 601 /* Switch to the guard PID before turning on MMU */
602 radix__switch_mmu_context(NULL, &init_mm);
583 if (cpu_has_feature(CPU_FTR_HVMODE)) 603 if (cpu_has_feature(CPU_FTR_HVMODE))
584 tlbiel_all(); 604 tlbiel_all();
585} 605}
@@ -604,6 +624,7 @@ void radix__early_init_mmu_secondary(void)
604 } 624 }
605 radix_init_iamr(); 625 radix_init_iamr();
606 626
627 radix__switch_mmu_context(NULL, &init_mm);
607 if (cpu_has_feature(CPU_FTR_HVMODE)) 628 if (cpu_has_feature(CPU_FTR_HVMODE))
608 tlbiel_all(); 629 tlbiel_all();
609} 630}
@@ -666,6 +687,30 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
666 pud_clear(pud); 687 pud_clear(pud);
667} 688}
668 689
690struct change_mapping_params {
691 pte_t *pte;
692 unsigned long start;
693 unsigned long end;
694 unsigned long aligned_start;
695 unsigned long aligned_end;
696};
697
698static int stop_machine_change_mapping(void *data)
699{
700 struct change_mapping_params *params =
701 (struct change_mapping_params *)data;
702
703 if (!data)
704 return -1;
705
706 spin_unlock(&init_mm.page_table_lock);
707 pte_clear(&init_mm, params->aligned_start, params->pte);
708 create_physical_mapping(params->aligned_start, params->start);
709 create_physical_mapping(params->end, params->aligned_end);
710 spin_lock(&init_mm.page_table_lock);
711 return 0;
712}
713
669static void remove_pte_table(pte_t *pte_start, unsigned long addr, 714static void remove_pte_table(pte_t *pte_start, unsigned long addr,
670 unsigned long end) 715 unsigned long end)
671{ 716{
@@ -694,6 +739,52 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
694 } 739 }
695} 740}
696 741
742/*
743 * clear the pte and potentially split the mapping helper
744 */
745static void split_kernel_mapping(unsigned long addr, unsigned long end,
746 unsigned long size, pte_t *pte)
747{
748 unsigned long mask = ~(size - 1);
749 unsigned long aligned_start = addr & mask;
750 unsigned long aligned_end = addr + size;
751 struct change_mapping_params params;
752 bool split_region = false;
753
754 if ((end - addr) < size) {
755 /*
756 * We're going to clear the PTE, but not flushed
757 * the mapping, time to remap and flush. The
758 * effects if visible outside the processor or
759 * if we are running in code close to the
760 * mapping we cleared, we are in trouble.
761 */
762 if (overlaps_kernel_text(aligned_start, addr) ||
763 overlaps_kernel_text(end, aligned_end)) {
764 /*
765 * Hack, just return, don't pte_clear
766 */
767 WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel "
768 "text, not splitting\n", addr, end);
769 return;
770 }
771 split_region = true;
772 }
773
774 if (split_region) {
775 params.pte = pte;
776 params.start = addr;
777 params.end = end;
778 params.aligned_start = addr & ~(size - 1);
779 params.aligned_end = min_t(unsigned long, aligned_end,
780 (unsigned long)__va(memblock_end_of_DRAM()));
781 stop_machine(stop_machine_change_mapping, &params, NULL);
782 return;
783 }
784
785 pte_clear(&init_mm, addr, pte);
786}
787
697static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, 788static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
698 unsigned long end) 789 unsigned long end)
699{ 790{
@@ -709,13 +800,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
709 continue; 800 continue;
710 801
711 if (pmd_huge(*pmd)) { 802 if (pmd_huge(*pmd)) {
712 if (!IS_ALIGNED(addr, PMD_SIZE) || 803 split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);
713 !IS_ALIGNED(next, PMD_SIZE)) {
714 WARN_ONCE(1, "%s: unaligned range\n", __func__);
715 continue;
716 }
717
718 pte_clear(&init_mm, addr, (pte_t *)pmd);
719 continue; 804 continue;
720 } 805 }
721 806
@@ -740,13 +825,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,
740 continue; 825 continue;
741 826
742 if (pud_huge(*pud)) { 827 if (pud_huge(*pud)) {
743 if (!IS_ALIGNED(addr, PUD_SIZE) || 828 split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);
744 !IS_ALIGNED(next, PUD_SIZE)) {
745 WARN_ONCE(1, "%s: unaligned range\n", __func__);
746 continue;
747 }
748
749 pte_clear(&init_mm, addr, (pte_t *)pud);
750 continue; 829 continue;
751 } 830 }
752 831
@@ -772,13 +851,7 @@ static void remove_pagetable(unsigned long start, unsigned long end)
772 continue; 851 continue;
773 852
774 if (pgd_huge(*pgd)) { 853 if (pgd_huge(*pgd)) {
775 if (!IS_ALIGNED(addr, PGDIR_SIZE) || 854 split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd);
776 !IS_ALIGNED(next, PGDIR_SIZE)) {
777 WARN_ONCE(1, "%s: unaligned range\n", __func__);
778 continue;
779 }
780
781 pte_clear(&init_mm, addr, (pte_t *)pgd);
782 continue; 855 continue;
783 } 856 }
784 857
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index c9a623c2d8a2..28c980eb4422 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -82,6 +82,8 @@ unsigned long __pgd_index_size;
82EXPORT_SYMBOL(__pgd_index_size); 82EXPORT_SYMBOL(__pgd_index_size);
83unsigned long __pmd_cache_index; 83unsigned long __pmd_cache_index;
84EXPORT_SYMBOL(__pmd_cache_index); 84EXPORT_SYMBOL(__pmd_cache_index);
85unsigned long __pud_cache_index;
86EXPORT_SYMBOL(__pud_cache_index);
85unsigned long __pte_table_size; 87unsigned long __pte_table_size;
86EXPORT_SYMBOL(__pte_table_size); 88EXPORT_SYMBOL(__pte_table_size);
87unsigned long __pmd_table_size; 89unsigned long __pmd_table_size;
@@ -471,6 +473,8 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
471 if (old & PATB_HR) { 473 if (old & PATB_HR) {
472 asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : : 474 asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
473 "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); 475 "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
476 asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
477 "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
474 trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1); 478 trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
475 } else { 479 } else {
476 asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : 480 asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 881ebd53ffc2..9b23f12e863c 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -51,7 +51,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
51 unsigned int psize; 51 unsigned int psize;
52 int ssize; 52 int ssize;
53 real_pte_t rpte; 53 real_pte_t rpte;
54 int i; 54 int i, offset;
55 55
56 i = batch->index; 56 i = batch->index;
57 57
@@ -67,6 +67,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
67 psize = get_slice_psize(mm, addr); 67 psize = get_slice_psize(mm, addr);
68 /* Mask the address for the correct page size */ 68 /* Mask the address for the correct page size */
69 addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1); 69 addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1);
70 if (unlikely(psize == MMU_PAGE_16G))
71 offset = PTRS_PER_PUD;
72 else
73 offset = PTRS_PER_PMD;
70#else 74#else
71 BUG(); 75 BUG();
72 psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ 76 psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
@@ -78,6 +82,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
78 * support 64k pages, this might be different from the 82 * support 64k pages, this might be different from the
79 * hardware page size encoded in the slice table. */ 83 * hardware page size encoded in the slice table. */
80 addr &= PAGE_MASK; 84 addr &= PAGE_MASK;
85 offset = PTRS_PER_PTE;
81 } 86 }
82 87
83 88
@@ -91,7 +96,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
91 } 96 }
92 WARN_ON(vsid == 0); 97 WARN_ON(vsid == 0);
93 vpn = hpt_vpn(addr, vsid, ssize); 98 vpn = hpt_vpn(addr, vsid, ssize);
94 rpte = __real_pte(__pte(pte), ptep); 99 rpte = __real_pte(__pte(pte), ptep, offset);
95 100
96 /* 101 /*
97 * Check if we have an active batch on this CPU. If not, just 102 * Check if we have an active batch on this CPU. If not, just
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index dd4c9b8b8a81..f6f55ab4980e 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -199,9 +199,11 @@ static void disable_nest_pmu_counters(void)
199 const struct cpumask *l_cpumask; 199 const struct cpumask *l_cpumask;
200 200
201 get_online_cpus(); 201 get_online_cpus();
202 for_each_online_node(nid) { 202 for_each_node_with_cpus(nid) {
203 l_cpumask = cpumask_of_node(nid); 203 l_cpumask = cpumask_of_node(nid);
204 cpu = cpumask_first(l_cpumask); 204 cpu = cpumask_first_and(l_cpumask, cpu_online_mask);
205 if (cpu >= nr_cpu_ids)
206 continue;
205 opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, 207 opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
206 get_hard_smp_processor_id(cpu)); 208 get_hard_smp_processor_id(cpu));
207 } 209 }
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
index 2b3eb01ab110..b7c53a51c31b 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -1063,16 +1063,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
1063 rc = PTR_ERR(txwin->paste_kaddr); 1063 rc = PTR_ERR(txwin->paste_kaddr);
1064 goto free_window; 1064 goto free_window;
1065 } 1065 }
1066 } else {
1067 /*
1068 * A user mapping must ensure that context switch issues
1069 * CP_ABORT for this thread.
1070 */
1071 rc = set_thread_uses_vas();
1072 if (rc)
1073 goto free_window;
1066 } 1074 }
1067 1075
1068 /*
1069 * Now that we have a send window, ensure context switch issues
1070 * CP_ABORT for this thread.
1071 */
1072 rc = -EINVAL;
1073 if (set_thread_uses_vas() < 0)
1074 goto free_window;
1075
1076 set_vinst_win(vinst, txwin); 1076 set_vinst_win(vinst, txwin);
1077 1077
1078 return txwin; 1078 return txwin;
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index dceb51454d8d..652d3e96b812 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -36,6 +36,7 @@
36#include <asm/xics.h> 36#include <asm/xics.h>
37#include <asm/xive.h> 37#include <asm/xive.h>
38#include <asm/plpar_wrappers.h> 38#include <asm/plpar_wrappers.h>
39#include <asm/topology.h>
39 40
40#include "pseries.h" 41#include "pseries.h"
41#include "offline_states.h" 42#include "offline_states.h"
@@ -331,6 +332,7 @@ static void pseries_remove_processor(struct device_node *np)
331 BUG_ON(cpu_online(cpu)); 332 BUG_ON(cpu_online(cpu));
332 set_cpu_present(cpu, false); 333 set_cpu_present(cpu, false);
333 set_hard_smp_processor_id(cpu, -1); 334 set_hard_smp_processor_id(cpu, -1);
335 update_numa_cpu_lookup_table(cpu, -1);
334 break; 336 break;
335 } 337 }
336 if (cpu >= nr_cpu_ids) 338 if (cpu >= nr_cpu_ids)
@@ -340,8 +342,6 @@ static void pseries_remove_processor(struct device_node *np)
340 cpu_maps_update_done(); 342 cpu_maps_update_done();
341} 343}
342 344
343extern int find_and_online_cpu_nid(int cpu);
344
345static int dlpar_online_cpu(struct device_node *dn) 345static int dlpar_online_cpu(struct device_node *dn)
346{ 346{
347 int rc = 0; 347 int rc = 0;
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 81d8614e7379..5e1ef9150182 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -49,6 +49,28 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
49 49
50 50
51/* 51/*
52 * Enable the hotplug interrupt late because processing them may touch other
53 * devices or systems (e.g. hugepages) that have not been initialized at the
54 * subsys stage.
55 */
56int __init init_ras_hotplug_IRQ(void)
57{
58 struct device_node *np;
59
60 /* Hotplug Events */
61 np = of_find_node_by_path("/event-sources/hot-plug-events");
62 if (np != NULL) {
63 if (dlpar_workqueue_init() == 0)
64 request_event_sources_irqs(np, ras_hotplug_interrupt,
65 "RAS_HOTPLUG");
66 of_node_put(np);
67 }
68
69 return 0;
70}
71machine_late_initcall(pseries, init_ras_hotplug_IRQ);
72
73/*
52 * Initialize handlers for the set of interrupts caused by hardware errors 74 * Initialize handlers for the set of interrupts caused by hardware errors
53 * and power system events. 75 * and power system events.
54 */ 76 */
@@ -66,15 +88,6 @@ static int __init init_ras_IRQ(void)
66 of_node_put(np); 88 of_node_put(np);
67 } 89 }
68 90
69 /* Hotplug Events */
70 np = of_find_node_by_path("/event-sources/hot-plug-events");
71 if (np != NULL) {
72 if (dlpar_workqueue_init() == 0)
73 request_event_sources_irqs(np, ras_hotplug_interrupt,
74 "RAS_HOTPLUG");
75 of_node_put(np);
76 }
77
78 /* EPOW Events */ 91 /* EPOW Events */
79 np = of_find_node_by_path("/event-sources/epow-events"); 92 np = of_find_node_by_path("/event-sources/epow-events");
80 if (np != NULL) { 93 if (np != NULL) {
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
index d9c4c9366049..091f1d0d0af1 100644
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -356,7 +356,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
356 356
357 rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size); 357 rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size);
358 if (rc) { 358 if (rc) {
359 pr_err("Error %lld getting queue info prio %d\n", rc, prio); 359 pr_err("Error %lld getting queue info CPU %d prio %d\n", rc,
360 target, prio);
360 rc = -EIO; 361 rc = -EIO;
361 goto fail; 362 goto fail;
362 } 363 }
@@ -370,7 +371,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
370 /* Configure and enable the queue in HW */ 371 /* Configure and enable the queue in HW */
371 rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order); 372 rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order);
372 if (rc) { 373 if (rc) {
373 pr_err("Error %lld setting queue for prio %d\n", rc, prio); 374 pr_err("Error %lld setting queue for CPU %d prio %d\n", rc,
375 target, prio);
374 rc = -EIO; 376 rc = -EIO;
375 } else { 377 } else {
376 q->qpage = qpage; 378 q->qpage = qpage;
@@ -389,8 +391,8 @@ static int xive_spapr_setup_queue(unsigned int cpu, struct xive_cpu *xc,
389 if (IS_ERR(qpage)) 391 if (IS_ERR(qpage))
390 return PTR_ERR(qpage); 392 return PTR_ERR(qpage);
391 393
392 return xive_spapr_configure_queue(cpu, q, prio, qpage, 394 return xive_spapr_configure_queue(get_hard_smp_processor_id(cpu),
393 xive_queue_shift); 395 q, prio, qpage, xive_queue_shift);
394} 396}
395 397
396static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, 398static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
@@ -399,10 +401,12 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
399 struct xive_q *q = &xc->queue[prio]; 401 struct xive_q *q = &xc->queue[prio];
400 unsigned int alloc_order; 402 unsigned int alloc_order;
401 long rc; 403 long rc;
404 int hw_cpu = get_hard_smp_processor_id(cpu);
402 405
403 rc = plpar_int_set_queue_config(0, cpu, prio, 0, 0); 406 rc = plpar_int_set_queue_config(0, hw_cpu, prio, 0, 0);
404 if (rc) 407 if (rc)
405 pr_err("Error %ld setting queue for prio %d\n", rc, prio); 408 pr_err("Error %ld setting queue for CPU %d prio %d\n", rc,
409 hw_cpu, prio);
406 410
407 alloc_order = xive_alloc_order(xive_queue_shift); 411 alloc_order = xive_alloc_order(xive_queue_shift);
408 free_pages((unsigned long)q->qpage, alloc_order); 412 free_pages((unsigned long)q->qpage, alloc_order);
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 6bf594ace663..8767e45f1b2b 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -430,6 +430,8 @@ config SPARC_LEON
430 depends on SPARC32 430 depends on SPARC32
431 select USB_EHCI_BIG_ENDIAN_MMIO 431 select USB_EHCI_BIG_ENDIAN_MMIO
432 select USB_EHCI_BIG_ENDIAN_DESC 432 select USB_EHCI_BIG_ENDIAN_DESC
433 select USB_UHCI_BIG_ENDIAN_MMIO
434 select USB_UHCI_BIG_ENDIAN_DESC
433 ---help--- 435 ---help---
434 If you say Y here if you are running on a SPARC-LEON processor. 436 If you say Y here if you are running on a SPARC-LEON processor.
435 The LEON processor is a synthesizable VHDL model of the 437 The LEON processor is a synthesizable VHDL model of the
diff --git a/arch/x86/.gitignore b/arch/x86/.gitignore
index aff152c87cf4..5a82bac5e0bc 100644
--- a/arch/x86/.gitignore
+++ b/arch/x86/.gitignore
@@ -1,6 +1,7 @@
1boot/compressed/vmlinux 1boot/compressed/vmlinux
2tools/test_get_len 2tools/test_get_len
3tools/insn_sanity 3tools/insn_sanity
4tools/insn_decoder_test
4purgatory/kexec-purgatory.c 5purgatory/kexec-purgatory.c
5purgatory/purgatory.ro 6purgatory/purgatory.ro
6 7
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 63bf349b2b24..c1236b187824 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -423,12 +423,6 @@ config X86_MPPARSE
423 For old smp systems that do not have proper acpi support. Newer systems 423 For old smp systems that do not have proper acpi support. Newer systems
424 (esp with 64bit cpus) with acpi support, MADT and DSDT will override it 424 (esp with 64bit cpus) with acpi support, MADT and DSDT will override it
425 425
426config X86_BIGSMP
427 bool "Support for big SMP systems with more than 8 CPUs"
428 depends on X86_32 && SMP
429 ---help---
430 This option is needed for the systems that have more than 8 CPUs
431
432config GOLDFISH 426config GOLDFISH
433 def_bool y 427 def_bool y
434 depends on X86_GOLDFISH 428 depends on X86_GOLDFISH
@@ -460,6 +454,12 @@ config INTEL_RDT
460 Say N if unsure. 454 Say N if unsure.
461 455
462if X86_32 456if X86_32
457config X86_BIGSMP
458 bool "Support for big SMP systems with more than 8 CPUs"
459 depends on SMP
460 ---help---
461 This option is needed for the systems that have more than 8 CPUs
462
463config X86_EXTENDED_PLATFORM 463config X86_EXTENDED_PLATFORM
464 bool "Support for extended (non-PC) x86 platforms" 464 bool "Support for extended (non-PC) x86 platforms"
465 default y 465 default y
@@ -949,25 +949,66 @@ config MAXSMP
949 Enable maximum number of CPUS and NUMA Nodes for this architecture. 949 Enable maximum number of CPUS and NUMA Nodes for this architecture.
950 If unsure, say N. 950 If unsure, say N.
951 951
952#
953# The maximum number of CPUs supported:
954#
955# The main config value is NR_CPUS, which defaults to NR_CPUS_DEFAULT,
956# and which can be configured interactively in the
957# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range.
958#
959# The ranges are different on 32-bit and 64-bit kernels, depending on
960# hardware capabilities and scalability features of the kernel.
961#
962# ( If MAXSMP is enabled we just use the highest possible value and disable
963# interactive configuration. )
964#
965
966config NR_CPUS_RANGE_BEGIN
967 int
968 default NR_CPUS_RANGE_END if MAXSMP
969 default 1 if !SMP
970 default 2
971
972config NR_CPUS_RANGE_END
973 int
974 depends on X86_32
975 default 64 if SMP && X86_BIGSMP
976 default 8 if SMP && !X86_BIGSMP
977 default 1 if !SMP
978
979config NR_CPUS_RANGE_END
980 int
981 depends on X86_64
982 default 8192 if SMP && ( MAXSMP || CPUMASK_OFFSTACK)
983 default 512 if SMP && (!MAXSMP && !CPUMASK_OFFSTACK)
984 default 1 if !SMP
985
986config NR_CPUS_DEFAULT
987 int
988 depends on X86_32
989 default 32 if X86_BIGSMP
990 default 8 if SMP
991 default 1 if !SMP
992
993config NR_CPUS_DEFAULT
994 int
995 depends on X86_64
996 default 8192 if MAXSMP
997 default 64 if SMP
998 default 1 if !SMP
999
952config NR_CPUS 1000config NR_CPUS
953 int "Maximum number of CPUs" if SMP && !MAXSMP 1001 int "Maximum number of CPUs" if SMP && !MAXSMP
954 range 2 8 if SMP && X86_32 && !X86_BIGSMP 1002 range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END
955 range 2 64 if SMP && X86_32 && X86_BIGSMP 1003 default NR_CPUS_DEFAULT
956 range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
957 range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
958 default "1" if !SMP
959 default "8192" if MAXSMP
960 default "32" if SMP && X86_BIGSMP
961 default "8" if SMP && X86_32
962 default "64" if SMP
963 ---help--- 1004 ---help---
964 This allows you to specify the maximum number of CPUs which this 1005 This allows you to specify the maximum number of CPUs which this
965 kernel will support. If CPUMASK_OFFSTACK is enabled, the maximum 1006 kernel will support. If CPUMASK_OFFSTACK is enabled, the maximum
966 supported value is 8192, otherwise the maximum value is 512. The 1007 supported value is 8192, otherwise the maximum value is 512. The
967 minimum value which makes sense is 2. 1008 minimum value which makes sense is 2.
968 1009
969 This is purely to save memory - each supported CPU adds 1010 This is purely to save memory: each supported CPU adds about 8KB
970 approximately eight kilobytes to the kernel image. 1011 to the kernel image.
971 1012
972config SCHED_SMT 1013config SCHED_SMT
973 bool "SMT (Hyperthreading) scheduler support" 1014 bool "SMT (Hyperthreading) scheduler support"
@@ -1363,7 +1404,7 @@ config HIGHMEM4G
1363 1404
1364config HIGHMEM64G 1405config HIGHMEM64G
1365 bool "64GB" 1406 bool "64GB"
1366 depends on !M486 1407 depends on !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6
1367 select X86_PAE 1408 select X86_PAE
1368 ---help--- 1409 ---help---
1369 Select this if you have a 32-bit processor and more than 4 1410 Select this if you have a 32-bit processor and more than 4
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 65a9a4716e34..8b8d2297d486 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -374,7 +374,7 @@ config X86_TSC
374 374
375config X86_CMPXCHG64 375config X86_CMPXCHG64
376 def_bool y 376 def_bool y
377 depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM 377 depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8
378 378
379# this should be set for all -march=.. options where the compiler 379# this should be set for all -march=.. options where the compiler
380# generates cmov. 380# generates cmov.
@@ -385,7 +385,7 @@ config X86_CMOV
385config X86_MINIMUM_CPU_FAMILY 385config X86_MINIMUM_CPU_FAMILY
386 int 386 int
387 default "64" if X86_64 387 default "64" if X86_64
388 default "6" if X86_32 && X86_P6_NOP 388 default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8)
389 default "5" if X86_32 && X86_CMPXCHG64 389 default "5" if X86_32 && X86_CMPXCHG64
390 default "4" 390 default "4"
391 391
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
index 36870b26067a..d08805032f01 100644
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
@@ -57,10 +57,12 @@ void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
57{ 57{
58 unsigned int j; 58 unsigned int j;
59 59
60 state->lens[0] = 0; 60 /* initially all lanes are unused */
61 state->lens[1] = 1; 61 state->lens[0] = 0xFFFFFFFF00000000;
62 state->lens[2] = 2; 62 state->lens[1] = 0xFFFFFFFF00000001;
63 state->lens[3] = 3; 63 state->lens[2] = 0xFFFFFFFF00000002;
64 state->lens[3] = 0xFFFFFFFF00000003;
65
64 state->unused_lanes = 0xFF03020100; 66 state->unused_lanes = 0xFF03020100;
65 for (j = 0; j < 4; j++) 67 for (j = 0; j < 4; j++)
66 state->ldata[j].job_in_lane = NULL; 68 state->ldata[j].job_in_lane = NULL;
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 3f48f695d5e6..dce7092ab24a 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -97,80 +97,69 @@ For 32-bit we have the following conventions - kernel is built with
97 97
98#define SIZEOF_PTREGS 21*8 98#define SIZEOF_PTREGS 21*8
99 99
100 .macro ALLOC_PT_GPREGS_ON_STACK 100.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax
101 addq $-(15*8), %rsp 101 /*
102 .endm 102 * Push registers and sanitize registers of values that a
103 * speculation attack might otherwise want to exploit. The
104 * lower registers are likely clobbered well before they
105 * could be put to use in a speculative execution gadget.
106 * Interleave XOR with PUSH for better uop scheduling:
107 */
108 pushq %rdi /* pt_regs->di */
109 pushq %rsi /* pt_regs->si */
110 pushq \rdx /* pt_regs->dx */
111 pushq %rcx /* pt_regs->cx */
112 pushq \rax /* pt_regs->ax */
113 pushq %r8 /* pt_regs->r8 */
114 xorq %r8, %r8 /* nospec r8 */
115 pushq %r9 /* pt_regs->r9 */
116 xorq %r9, %r9 /* nospec r9 */
117 pushq %r10 /* pt_regs->r10 */
118 xorq %r10, %r10 /* nospec r10 */
119 pushq %r11 /* pt_regs->r11 */
120 xorq %r11, %r11 /* nospec r11*/
121 pushq %rbx /* pt_regs->rbx */
122 xorl %ebx, %ebx /* nospec rbx*/
123 pushq %rbp /* pt_regs->rbp */
124 xorl %ebp, %ebp /* nospec rbp*/
125 pushq %r12 /* pt_regs->r12 */
126 xorq %r12, %r12 /* nospec r12*/
127 pushq %r13 /* pt_regs->r13 */
128 xorq %r13, %r13 /* nospec r13*/
129 pushq %r14 /* pt_regs->r14 */
130 xorq %r14, %r14 /* nospec r14*/
131 pushq %r15 /* pt_regs->r15 */
132 xorq %r15, %r15 /* nospec r15*/
133 UNWIND_HINT_REGS
134.endm
103 135
104 .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1 136.macro POP_REGS pop_rdi=1 skip_r11rcx=0
105 .if \r11
106 movq %r11, 6*8+\offset(%rsp)
107 .endif
108 .if \r8910
109 movq %r10, 7*8+\offset(%rsp)
110 movq %r9, 8*8+\offset(%rsp)
111 movq %r8, 9*8+\offset(%rsp)
112 .endif
113 .if \rax
114 movq %rax, 10*8+\offset(%rsp)
115 .endif
116 .if \rcx
117 movq %rcx, 11*8+\offset(%rsp)
118 .endif
119 movq %rdx, 12*8+\offset(%rsp)
120 movq %rsi, 13*8+\offset(%rsp)
121 movq %rdi, 14*8+\offset(%rsp)
122 UNWIND_HINT_REGS offset=\offset extra=0
123 .endm
124 .macro SAVE_C_REGS offset=0
125 SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
126 .endm
127 .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
128 SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1
129 .endm
130 .macro SAVE_C_REGS_EXCEPT_R891011
131 SAVE_C_REGS_HELPER 0, 1, 1, 0, 0
132 .endm
133 .macro SAVE_C_REGS_EXCEPT_RCX_R891011
134 SAVE_C_REGS_HELPER 0, 1, 0, 0, 0
135 .endm
136 .macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11
137 SAVE_C_REGS_HELPER 0, 0, 0, 1, 0
138 .endm
139
140 .macro SAVE_EXTRA_REGS offset=0
141 movq %r15, 0*8+\offset(%rsp)
142 movq %r14, 1*8+\offset(%rsp)
143 movq %r13, 2*8+\offset(%rsp)
144 movq %r12, 3*8+\offset(%rsp)
145 movq %rbp, 4*8+\offset(%rsp)
146 movq %rbx, 5*8+\offset(%rsp)
147 UNWIND_HINT_REGS offset=\offset
148 .endm
149
150 .macro POP_EXTRA_REGS
151 popq %r15 137 popq %r15
152 popq %r14 138 popq %r14
153 popq %r13 139 popq %r13
154 popq %r12 140 popq %r12
155 popq %rbp 141 popq %rbp
156 popq %rbx 142 popq %rbx
157 .endm 143 .if \skip_r11rcx
158 144 popq %rsi
159 .macro POP_C_REGS 145 .else
160 popq %r11 146 popq %r11
147 .endif
161 popq %r10 148 popq %r10
162 popq %r9 149 popq %r9
163 popq %r8 150 popq %r8
164 popq %rax 151 popq %rax
152 .if \skip_r11rcx
153 popq %rsi
154 .else
165 popq %rcx 155 popq %rcx
156 .endif
166 popq %rdx 157 popq %rdx
167 popq %rsi 158 popq %rsi
159 .if \pop_rdi
168 popq %rdi 160 popq %rdi
169 .endm 161 .endif
170 162.endm
171 .macro icebp
172 .byte 0xf1
173 .endm
174 163
175/* 164/*
176 * This is a sneaky trick to help the unwinder find pt_regs on the stack. The 165 * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
@@ -178,7 +167,7 @@ For 32-bit we have the following conventions - kernel is built with
178 * is just setting the LSB, which makes it an invalid stack address and is also 167 * is just setting the LSB, which makes it an invalid stack address and is also
179 * a signal to the unwinder that it's a pt_regs pointer in disguise. 168 * a signal to the unwinder that it's a pt_regs pointer in disguise.
180 * 169 *
181 * NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts 170 * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
182 * the original rbp. 171 * the original rbp.
183 */ 172 */
184.macro ENCODE_FRAME_POINTER ptregs_offset=0 173.macro ENCODE_FRAME_POINTER ptregs_offset=0
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 30c8c5344c4a..8971bd64d515 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -213,7 +213,7 @@ ENTRY(entry_SYSCALL_64)
213 213
214 swapgs 214 swapgs
215 /* 215 /*
216 * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it 216 * This path is only taken when PAGE_TABLE_ISOLATION is disabled so it
217 * is not required to switch CR3. 217 * is not required to switch CR3.
218 */ 218 */
219 movq %rsp, PER_CPU_VAR(rsp_scratch) 219 movq %rsp, PER_CPU_VAR(rsp_scratch)
@@ -227,22 +227,8 @@ ENTRY(entry_SYSCALL_64)
227 pushq %rcx /* pt_regs->ip */ 227 pushq %rcx /* pt_regs->ip */
228GLOBAL(entry_SYSCALL_64_after_hwframe) 228GLOBAL(entry_SYSCALL_64_after_hwframe)
229 pushq %rax /* pt_regs->orig_ax */ 229 pushq %rax /* pt_regs->orig_ax */
230 pushq %rdi /* pt_regs->di */ 230
231 pushq %rsi /* pt_regs->si */ 231 PUSH_AND_CLEAR_REGS rax=$-ENOSYS
232 pushq %rdx /* pt_regs->dx */
233 pushq %rcx /* pt_regs->cx */
234 pushq $-ENOSYS /* pt_regs->ax */
235 pushq %r8 /* pt_regs->r8 */
236 pushq %r9 /* pt_regs->r9 */
237 pushq %r10 /* pt_regs->r10 */
238 pushq %r11 /* pt_regs->r11 */
239 pushq %rbx /* pt_regs->rbx */
240 pushq %rbp /* pt_regs->rbp */
241 pushq %r12 /* pt_regs->r12 */
242 pushq %r13 /* pt_regs->r13 */
243 pushq %r14 /* pt_regs->r14 */
244 pushq %r15 /* pt_regs->r15 */
245 UNWIND_HINT_REGS
246 232
247 TRACE_IRQS_OFF 233 TRACE_IRQS_OFF
248 234
@@ -321,15 +307,7 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
321syscall_return_via_sysret: 307syscall_return_via_sysret:
322 /* rcx and r11 are already restored (see code above) */ 308 /* rcx and r11 are already restored (see code above) */
323 UNWIND_HINT_EMPTY 309 UNWIND_HINT_EMPTY
324 POP_EXTRA_REGS 310 POP_REGS pop_rdi=0 skip_r11rcx=1
325 popq %rsi /* skip r11 */
326 popq %r10
327 popq %r9
328 popq %r8
329 popq %rax
330 popq %rsi /* skip rcx */
331 popq %rdx
332 popq %rsi
333 311
334 /* 312 /*
335 * Now all regs are restored except RSP and RDI. 313 * Now all regs are restored except RSP and RDI.
@@ -559,9 +537,7 @@ END(irq_entries_start)
559 call switch_to_thread_stack 537 call switch_to_thread_stack
5601: 5381:
561 539
562 ALLOC_PT_GPREGS_ON_STACK 540 PUSH_AND_CLEAR_REGS
563 SAVE_C_REGS
564 SAVE_EXTRA_REGS
565 ENCODE_FRAME_POINTER 541 ENCODE_FRAME_POINTER
566 542
567 testb $3, CS(%rsp) 543 testb $3, CS(%rsp)
@@ -622,15 +598,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
622 ud2 598 ud2
6231: 5991:
624#endif 600#endif
625 POP_EXTRA_REGS 601 POP_REGS pop_rdi=0
626 popq %r11
627 popq %r10
628 popq %r9
629 popq %r8
630 popq %rax
631 popq %rcx
632 popq %rdx
633 popq %rsi
634 602
635 /* 603 /*
636 * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS. 604 * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
@@ -688,8 +656,7 @@ GLOBAL(restore_regs_and_return_to_kernel)
688 ud2 656 ud2
6891: 6571:
690#endif 658#endif
691 POP_EXTRA_REGS 659 POP_REGS
692 POP_C_REGS
693 addq $8, %rsp /* skip regs->orig_ax */ 660 addq $8, %rsp /* skip regs->orig_ax */
694 /* 661 /*
695 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization 662 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
@@ -908,7 +875,9 @@ ENTRY(\sym)
908 pushq $-1 /* ORIG_RAX: no syscall to restart */ 875 pushq $-1 /* ORIG_RAX: no syscall to restart */
909 .endif 876 .endif
910 877
911 ALLOC_PT_GPREGS_ON_STACK 878 /* Save all registers in pt_regs */
879 PUSH_AND_CLEAR_REGS
880 ENCODE_FRAME_POINTER
912 881
913 .if \paranoid < 2 882 .if \paranoid < 2
914 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */ 883 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
@@ -1121,9 +1090,7 @@ ENTRY(xen_failsafe_callback)
1121 addq $0x30, %rsp 1090 addq $0x30, %rsp
1122 UNWIND_HINT_IRET_REGS 1091 UNWIND_HINT_IRET_REGS
1123 pushq $-1 /* orig_ax = -1 => not a system call */ 1092 pushq $-1 /* orig_ax = -1 => not a system call */
1124 ALLOC_PT_GPREGS_ON_STACK 1093 PUSH_AND_CLEAR_REGS
1125 SAVE_C_REGS
1126 SAVE_EXTRA_REGS
1127 ENCODE_FRAME_POINTER 1094 ENCODE_FRAME_POINTER
1128 jmp error_exit 1095 jmp error_exit
1129END(xen_failsafe_callback) 1096END(xen_failsafe_callback)
@@ -1163,16 +1130,13 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1
1163#endif 1130#endif
1164 1131
1165/* 1132/*
1166 * Save all registers in pt_regs, and switch gs if needed. 1133 * Switch gs if needed.
1167 * Use slow, but surefire "are we in kernel?" check. 1134 * Use slow, but surefire "are we in kernel?" check.
1168 * Return: ebx=0: need swapgs on exit, ebx=1: otherwise 1135 * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
1169 */ 1136 */
1170ENTRY(paranoid_entry) 1137ENTRY(paranoid_entry)
1171 UNWIND_HINT_FUNC 1138 UNWIND_HINT_FUNC
1172 cld 1139 cld
1173 SAVE_C_REGS 8
1174 SAVE_EXTRA_REGS 8
1175 ENCODE_FRAME_POINTER 8
1176 movl $1, %ebx 1140 movl $1, %ebx
1177 movl $MSR_GS_BASE, %ecx 1141 movl $MSR_GS_BASE, %ecx
1178 rdmsr 1142 rdmsr
@@ -1211,21 +1175,18 @@ ENTRY(paranoid_exit)
1211 jmp .Lparanoid_exit_restore 1175 jmp .Lparanoid_exit_restore
1212.Lparanoid_exit_no_swapgs: 1176.Lparanoid_exit_no_swapgs:
1213 TRACE_IRQS_IRETQ_DEBUG 1177 TRACE_IRQS_IRETQ_DEBUG
1178 RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
1214.Lparanoid_exit_restore: 1179.Lparanoid_exit_restore:
1215 jmp restore_regs_and_return_to_kernel 1180 jmp restore_regs_and_return_to_kernel
1216END(paranoid_exit) 1181END(paranoid_exit)
1217 1182
1218/* 1183/*
1219 * Save all registers in pt_regs, and switch gs if needed. 1184 * Switch gs if needed.
1220 * Return: EBX=0: came from user mode; EBX=1: otherwise 1185 * Return: EBX=0: came from user mode; EBX=1: otherwise
1221 */ 1186 */
1222ENTRY(error_entry) 1187ENTRY(error_entry)
1223 UNWIND_HINT_FUNC 1188 UNWIND_HINT_REGS offset=8
1224 cld 1189 cld
1225 SAVE_C_REGS 8
1226 SAVE_EXTRA_REGS 8
1227 ENCODE_FRAME_POINTER 8
1228 xorl %ebx, %ebx
1229 testb $3, CS+8(%rsp) 1190 testb $3, CS+8(%rsp)
1230 jz .Lerror_kernelspace 1191 jz .Lerror_kernelspace
1231 1192
@@ -1406,22 +1367,7 @@ ENTRY(nmi)
1406 pushq 1*8(%rdx) /* pt_regs->rip */ 1367 pushq 1*8(%rdx) /* pt_regs->rip */
1407 UNWIND_HINT_IRET_REGS 1368 UNWIND_HINT_IRET_REGS
1408 pushq $-1 /* pt_regs->orig_ax */ 1369 pushq $-1 /* pt_regs->orig_ax */
1409 pushq %rdi /* pt_regs->di */ 1370 PUSH_AND_CLEAR_REGS rdx=(%rdx)
1410 pushq %rsi /* pt_regs->si */
1411 pushq (%rdx) /* pt_regs->dx */
1412 pushq %rcx /* pt_regs->cx */
1413 pushq %rax /* pt_regs->ax */
1414 pushq %r8 /* pt_regs->r8 */
1415 pushq %r9 /* pt_regs->r9 */
1416 pushq %r10 /* pt_regs->r10 */
1417 pushq %r11 /* pt_regs->r11 */
1418 pushq %rbx /* pt_regs->rbx */
1419 pushq %rbp /* pt_regs->rbp */
1420 pushq %r12 /* pt_regs->r12 */
1421 pushq %r13 /* pt_regs->r13 */
1422 pushq %r14 /* pt_regs->r14 */
1423 pushq %r15 /* pt_regs->r15 */
1424 UNWIND_HINT_REGS
1425 ENCODE_FRAME_POINTER 1371 ENCODE_FRAME_POINTER
1426 1372
1427 /* 1373 /*
@@ -1631,7 +1577,8 @@ end_repeat_nmi:
1631 * frame to point back to repeat_nmi. 1577 * frame to point back to repeat_nmi.
1632 */ 1578 */
1633 pushq $-1 /* ORIG_RAX: no syscall to restart */ 1579 pushq $-1 /* ORIG_RAX: no syscall to restart */
1634 ALLOC_PT_GPREGS_ON_STACK 1580 PUSH_AND_CLEAR_REGS
1581 ENCODE_FRAME_POINTER
1635 1582
1636 /* 1583 /*
1637 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit 1584 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
@@ -1655,8 +1602,7 @@ end_repeat_nmi:
1655nmi_swapgs: 1602nmi_swapgs:
1656 SWAPGS_UNSAFE_STACK 1603 SWAPGS_UNSAFE_STACK
1657nmi_restore: 1604nmi_restore:
1658 POP_EXTRA_REGS 1605 POP_REGS
1659 POP_C_REGS
1660 1606
1661 /* 1607 /*
1662 * Skip orig_ax and the "outermost" frame to point RSP at the "iret" 1608 * Skip orig_ax and the "outermost" frame to point RSP at the "iret"
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 98d5358e4041..fd65e016e413 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -85,15 +85,25 @@ ENTRY(entry_SYSENTER_compat)
85 pushq %rcx /* pt_regs->cx */ 85 pushq %rcx /* pt_regs->cx */
86 pushq $-ENOSYS /* pt_regs->ax */ 86 pushq $-ENOSYS /* pt_regs->ax */
87 pushq $0 /* pt_regs->r8 = 0 */ 87 pushq $0 /* pt_regs->r8 = 0 */
88 xorq %r8, %r8 /* nospec r8 */
88 pushq $0 /* pt_regs->r9 = 0 */ 89 pushq $0 /* pt_regs->r9 = 0 */
90 xorq %r9, %r9 /* nospec r9 */
89 pushq $0 /* pt_regs->r10 = 0 */ 91 pushq $0 /* pt_regs->r10 = 0 */
92 xorq %r10, %r10 /* nospec r10 */
90 pushq $0 /* pt_regs->r11 = 0 */ 93 pushq $0 /* pt_regs->r11 = 0 */
94 xorq %r11, %r11 /* nospec r11 */
91 pushq %rbx /* pt_regs->rbx */ 95 pushq %rbx /* pt_regs->rbx */
96 xorl %ebx, %ebx /* nospec rbx */
92 pushq %rbp /* pt_regs->rbp (will be overwritten) */ 97 pushq %rbp /* pt_regs->rbp (will be overwritten) */
98 xorl %ebp, %ebp /* nospec rbp */
93 pushq $0 /* pt_regs->r12 = 0 */ 99 pushq $0 /* pt_regs->r12 = 0 */
100 xorq %r12, %r12 /* nospec r12 */
94 pushq $0 /* pt_regs->r13 = 0 */ 101 pushq $0 /* pt_regs->r13 = 0 */
102 xorq %r13, %r13 /* nospec r13 */
95 pushq $0 /* pt_regs->r14 = 0 */ 103 pushq $0 /* pt_regs->r14 = 0 */
104 xorq %r14, %r14 /* nospec r14 */
96 pushq $0 /* pt_regs->r15 = 0 */ 105 pushq $0 /* pt_regs->r15 = 0 */
106 xorq %r15, %r15 /* nospec r15 */
97 cld 107 cld
98 108
99 /* 109 /*
@@ -214,15 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
214 pushq %rbp /* pt_regs->cx (stashed in bp) */ 224 pushq %rbp /* pt_regs->cx (stashed in bp) */
215 pushq $-ENOSYS /* pt_regs->ax */ 225 pushq $-ENOSYS /* pt_regs->ax */
216 pushq $0 /* pt_regs->r8 = 0 */ 226 pushq $0 /* pt_regs->r8 = 0 */
227 xorq %r8, %r8 /* nospec r8 */
217 pushq $0 /* pt_regs->r9 = 0 */ 228 pushq $0 /* pt_regs->r9 = 0 */
229 xorq %r9, %r9 /* nospec r9 */
218 pushq $0 /* pt_regs->r10 = 0 */ 230 pushq $0 /* pt_regs->r10 = 0 */
231 xorq %r10, %r10 /* nospec r10 */
219 pushq $0 /* pt_regs->r11 = 0 */ 232 pushq $0 /* pt_regs->r11 = 0 */
233 xorq %r11, %r11 /* nospec r11 */
220 pushq %rbx /* pt_regs->rbx */ 234 pushq %rbx /* pt_regs->rbx */
235 xorl %ebx, %ebx /* nospec rbx */
221 pushq %rbp /* pt_regs->rbp (will be overwritten) */ 236 pushq %rbp /* pt_regs->rbp (will be overwritten) */
237 xorl %ebp, %ebp /* nospec rbp */
222 pushq $0 /* pt_regs->r12 = 0 */ 238 pushq $0 /* pt_regs->r12 = 0 */
239 xorq %r12, %r12 /* nospec r12 */
223 pushq $0 /* pt_regs->r13 = 0 */ 240 pushq $0 /* pt_regs->r13 = 0 */
241 xorq %r13, %r13 /* nospec r13 */
224 pushq $0 /* pt_regs->r14 = 0 */ 242 pushq $0 /* pt_regs->r14 = 0 */
243 xorq %r14, %r14 /* nospec r14 */
225 pushq $0 /* pt_regs->r15 = 0 */ 244 pushq $0 /* pt_regs->r15 = 0 */
245 xorq %r15, %r15 /* nospec r15 */
226 246
227 /* 247 /*
228 * User mode is traced as though IRQs are on, and SYSENTER 248 * User mode is traced as though IRQs are on, and SYSENTER
@@ -338,15 +358,25 @@ ENTRY(entry_INT80_compat)
338 pushq %rcx /* pt_regs->cx */ 358 pushq %rcx /* pt_regs->cx */
339 pushq $-ENOSYS /* pt_regs->ax */ 359 pushq $-ENOSYS /* pt_regs->ax */
340 pushq $0 /* pt_regs->r8 = 0 */ 360 pushq $0 /* pt_regs->r8 = 0 */
361 xorq %r8, %r8 /* nospec r8 */
341 pushq $0 /* pt_regs->r9 = 0 */ 362 pushq $0 /* pt_regs->r9 = 0 */
363 xorq %r9, %r9 /* nospec r9 */
342 pushq $0 /* pt_regs->r10 = 0 */ 364 pushq $0 /* pt_regs->r10 = 0 */
365 xorq %r10, %r10 /* nospec r10 */
343 pushq $0 /* pt_regs->r11 = 0 */ 366 pushq $0 /* pt_regs->r11 = 0 */
367 xorq %r11, %r11 /* nospec r11 */
344 pushq %rbx /* pt_regs->rbx */ 368 pushq %rbx /* pt_regs->rbx */
369 xorl %ebx, %ebx /* nospec rbx */
345 pushq %rbp /* pt_regs->rbp */ 370 pushq %rbp /* pt_regs->rbp */
371 xorl %ebp, %ebp /* nospec rbp */
346 pushq %r12 /* pt_regs->r12 */ 372 pushq %r12 /* pt_regs->r12 */
373 xorq %r12, %r12 /* nospec r12 */
347 pushq %r13 /* pt_regs->r13 */ 374 pushq %r13 /* pt_regs->r13 */
375 xorq %r13, %r13 /* nospec r13 */
348 pushq %r14 /* pt_regs->r14 */ 376 pushq %r14 /* pt_regs->r14 */
377 xorq %r14, %r14 /* nospec r14 */
349 pushq %r15 /* pt_regs->r15 */ 378 pushq %r15 /* pt_regs->r15 */
379 xorq %r15, %r15 /* nospec r15 */
350 cld 380 cld
351 381
352 /* 382 /*
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 731153a4681e..56457cb73448 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3559,7 +3559,7 @@ static int intel_snb_pebs_broken(int cpu)
3559 break; 3559 break;
3560 3560
3561 case INTEL_FAM6_SANDYBRIDGE_X: 3561 case INTEL_FAM6_SANDYBRIDGE_X:
3562 switch (cpu_data(cpu).x86_mask) { 3562 switch (cpu_data(cpu).x86_stepping) {
3563 case 6: rev = 0x618; break; 3563 case 6: rev = 0x618; break;
3564 case 7: rev = 0x70c; break; 3564 case 7: rev = 0x70c; break;
3565 } 3565 }
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index ae64d0b69729..cf372b90557e 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -1186,7 +1186,7 @@ void __init intel_pmu_lbr_init_atom(void)
1186 * on PMU interrupt 1186 * on PMU interrupt
1187 */ 1187 */
1188 if (boot_cpu_data.x86_model == 28 1188 if (boot_cpu_data.x86_model == 28
1189 && boot_cpu_data.x86_mask < 10) { 1189 && boot_cpu_data.x86_stepping < 10) {
1190 pr_cont("LBR disabled due to erratum"); 1190 pr_cont("LBR disabled due to erratum");
1191 return; 1191 return;
1192 } 1192 }
diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c
index a5604c352930..408879b0c0d4 100644
--- a/arch/x86/events/intel/p6.c
+++ b/arch/x86/events/intel/p6.c
@@ -234,7 +234,7 @@ static __initconst const struct x86_pmu p6_pmu = {
234 234
235static __init void p6_pmu_rdpmc_quirk(void) 235static __init void p6_pmu_rdpmc_quirk(void)
236{ 236{
237 if (boot_cpu_data.x86_mask < 9) { 237 if (boot_cpu_data.x86_stepping < 9) {
238 /* 238 /*
239 * PPro erratum 26; fixed in stepping 9 and above. 239 * PPro erratum 26; fixed in stepping 9 and above.
240 */ 240 */
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 44f5d79d5105..11881726ed37 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -94,7 +94,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
94 if (boot_cpu_data.x86 == 0x0F && 94 if (boot_cpu_data.x86 == 0x0F &&
95 boot_cpu_data.x86_vendor == X86_VENDOR_AMD && 95 boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
96 boot_cpu_data.x86_model <= 0x05 && 96 boot_cpu_data.x86_model <= 0x05 &&
97 boot_cpu_data.x86_mask < 0x0A) 97 boot_cpu_data.x86_stepping < 0x0A)
98 return 1; 98 return 1;
99 else if (boot_cpu_has(X86_BUG_AMD_APIC_C1E)) 99 else if (boot_cpu_has(X86_BUG_AMD_APIC_C1E))
100 return 1; 100 return 1;
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 30d406146016..e1259f043ae9 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -40,7 +40,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
40 40
41 asm ("cmp %1,%2; sbb %0,%0;" 41 asm ("cmp %1,%2; sbb %0,%0;"
42 :"=r" (mask) 42 :"=r" (mask)
43 :"r"(size),"r" (index) 43 :"g"(size),"r" (index)
44 :"cc"); 44 :"cc");
45 return mask; 45 return mask;
46} 46}
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 34d99af43994..6804d6642767 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -5,23 +5,20 @@
5#include <linux/stringify.h> 5#include <linux/stringify.h>
6 6
7/* 7/*
8 * Since some emulators terminate on UD2, we cannot use it for WARN. 8 * Despite that some emulators terminate on UD2, we use it for WARN().
9 * Since various instruction decoders disagree on the length of UD1,
10 * we cannot use it either. So use UD0 for WARN.
11 * 9 *
12 * (binutils knows about "ud1" but {en,de}codes it as 2 bytes, whereas 10 * Since various instruction decoders/specs disagree on the encoding of
13 * our kernel decoder thinks it takes a ModRM byte, which seems consistent 11 * UD0/UD1.
14 * with various things like the Intel SDM instruction encoding rules)
15 */ 12 */
16 13
17#define ASM_UD0 ".byte 0x0f, 0xff" 14#define ASM_UD0 ".byte 0x0f, 0xff" /* + ModRM (for Intel) */
18#define ASM_UD1 ".byte 0x0f, 0xb9" /* + ModRM */ 15#define ASM_UD1 ".byte 0x0f, 0xb9" /* + ModRM */
19#define ASM_UD2 ".byte 0x0f, 0x0b" 16#define ASM_UD2 ".byte 0x0f, 0x0b"
20 17
21#define INSN_UD0 0xff0f 18#define INSN_UD0 0xff0f
22#define INSN_UD2 0x0b0f 19#define INSN_UD2 0x0b0f
23 20
24#define LEN_UD0 2 21#define LEN_UD2 2
25 22
26#ifdef CONFIG_GENERIC_BUG 23#ifdef CONFIG_GENERIC_BUG
27 24
@@ -77,7 +74,11 @@ do { \
77 unreachable(); \ 74 unreachable(); \
78} while (0) 75} while (0)
79 76
80#define __WARN_FLAGS(flags) _BUG_FLAGS(ASM_UD0, BUGFLAG_WARNING|(flags)) 77#define __WARN_FLAGS(flags) \
78do { \
79 _BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags)); \
80 annotate_reachable(); \
81} while (0)
81 82
82#include <asm-generic/bug.h> 83#include <asm-generic/bug.h>
83 84
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 70eddb3922ff..736771c9822e 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -148,45 +148,46 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
148 */ 148 */
149static __always_inline __pure bool _static_cpu_has(u16 bit) 149static __always_inline __pure bool _static_cpu_has(u16 bit)
150{ 150{
151 asm_volatile_goto("1: jmp 6f\n" 151 asm_volatile_goto("1: jmp 6f\n"
152 "2:\n" 152 "2:\n"
153 ".skip -(((5f-4f) - (2b-1b)) > 0) * " 153 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
154 "((5f-4f) - (2b-1b)),0x90\n" 154 "((5f-4f) - (2b-1b)),0x90\n"
155 "3:\n" 155 "3:\n"
156 ".section .altinstructions,\"a\"\n" 156 ".section .altinstructions,\"a\"\n"
157 " .long 1b - .\n" /* src offset */ 157 " .long 1b - .\n" /* src offset */
158 " .long 4f - .\n" /* repl offset */ 158 " .long 4f - .\n" /* repl offset */
159 " .word %P1\n" /* always replace */ 159 " .word %P[always]\n" /* always replace */
160 " .byte 3b - 1b\n" /* src len */ 160 " .byte 3b - 1b\n" /* src len */
161 " .byte 5f - 4f\n" /* repl len */ 161 " .byte 5f - 4f\n" /* repl len */
162 " .byte 3b - 2b\n" /* pad len */ 162 " .byte 3b - 2b\n" /* pad len */
163 ".previous\n" 163 ".previous\n"
164 ".section .altinstr_replacement,\"ax\"\n" 164 ".section .altinstr_replacement,\"ax\"\n"
165 "4: jmp %l[t_no]\n" 165 "4: jmp %l[t_no]\n"
166 "5:\n" 166 "5:\n"
167 ".previous\n" 167 ".previous\n"
168 ".section .altinstructions,\"a\"\n" 168 ".section .altinstructions,\"a\"\n"
169 " .long 1b - .\n" /* src offset */ 169 " .long 1b - .\n" /* src offset */
170 " .long 0\n" /* no replacement */ 170 " .long 0\n" /* no replacement */
171 " .word %P0\n" /* feature bit */ 171 " .word %P[feature]\n" /* feature bit */
172 " .byte 3b - 1b\n" /* src len */ 172 " .byte 3b - 1b\n" /* src len */
173 " .byte 0\n" /* repl len */ 173 " .byte 0\n" /* repl len */
174 " .byte 0\n" /* pad len */ 174 " .byte 0\n" /* pad len */
175 ".previous\n" 175 ".previous\n"
176 ".section .altinstr_aux,\"ax\"\n" 176 ".section .altinstr_aux,\"ax\"\n"
177 "6:\n" 177 "6:\n"
178 " testb %[bitnum],%[cap_byte]\n" 178 " testb %[bitnum],%[cap_byte]\n"
179 " jnz %l[t_yes]\n" 179 " jnz %l[t_yes]\n"
180 " jmp %l[t_no]\n" 180 " jmp %l[t_no]\n"
181 ".previous\n" 181 ".previous\n"
182 : : "i" (bit), "i" (X86_FEATURE_ALWAYS), 182 : : [feature] "i" (bit),
183 [bitnum] "i" (1 << (bit & 7)), 183 [always] "i" (X86_FEATURE_ALWAYS),
184 [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3]) 184 [bitnum] "i" (1 << (bit & 7)),
185 : : t_yes, t_no); 185 [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
186 t_yes: 186 : : t_yes, t_no);
187 return true; 187t_yes:
188 t_no: 188 return true;
189 return false; 189t_no:
190 return false;
190} 191}
191 192
192#define static_cpu_has(bit) \ 193#define static_cpu_has(bit) \
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 4d57894635f2..76b058533e47 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -6,6 +6,7 @@
6#include <asm/alternative.h> 6#include <asm/alternative.h>
7#include <asm/alternative-asm.h> 7#include <asm/alternative-asm.h>
8#include <asm/cpufeatures.h> 8#include <asm/cpufeatures.h>
9#include <asm/msr-index.h>
9 10
10#ifdef __ASSEMBLY__ 11#ifdef __ASSEMBLY__
11 12
@@ -164,10 +165,15 @@ static inline void vmexit_fill_RSB(void)
164 165
165static inline void indirect_branch_prediction_barrier(void) 166static inline void indirect_branch_prediction_barrier(void)
166{ 167{
167 alternative_input("", 168 asm volatile(ALTERNATIVE("",
168 "call __ibp_barrier", 169 "movl %[msr], %%ecx\n\t"
169 X86_FEATURE_USE_IBPB, 170 "movl %[val], %%eax\n\t"
170 ASM_NO_INPUT_CLOBBER("eax", "ecx", "edx", "memory")); 171 "movl $0, %%edx\n\t"
172 "wrmsr",
173 X86_FEATURE_USE_IBPB)
174 : : [msr] "i" (MSR_IA32_PRED_CMD),
175 [val] "i" (PRED_CMD_IBPB)
176 : "eax", "ecx", "edx", "memory");
171} 177}
172 178
173#endif /* __ASSEMBLY__ */ 179#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 4baa6bceb232..d652a3808065 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -52,10 +52,6 @@ static inline void clear_page(void *page)
52 52
53void copy_page(void *to, void *from); 53void copy_page(void *to, void *from);
54 54
55#ifdef CONFIG_X86_MCE
56#define arch_unmap_kpfn arch_unmap_kpfn
57#endif
58
59#endif /* !__ASSEMBLY__ */ 55#endif /* !__ASSEMBLY__ */
60 56
61#ifdef CONFIG_X86_VSYSCALL_EMULATION 57#ifdef CONFIG_X86_VSYSCALL_EMULATION
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 892df375b615..554841fab717 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -297,9 +297,9 @@ static inline void __flush_tlb_global(void)
297{ 297{
298 PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel); 298 PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel);
299} 299}
300static inline void __flush_tlb_single(unsigned long addr) 300static inline void __flush_tlb_one_user(unsigned long addr)
301{ 301{
302 PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); 302 PVOP_VCALL1(pv_mmu_ops.flush_tlb_one_user, addr);
303} 303}
304 304
305static inline void flush_tlb_others(const struct cpumask *cpumask, 305static inline void flush_tlb_others(const struct cpumask *cpumask,
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 6ec54d01972d..f624f1f10316 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -217,7 +217,7 @@ struct pv_mmu_ops {
217 /* TLB operations */ 217 /* TLB operations */
218 void (*flush_tlb_user)(void); 218 void (*flush_tlb_user)(void);
219 void (*flush_tlb_kernel)(void); 219 void (*flush_tlb_kernel)(void);
220 void (*flush_tlb_single)(unsigned long addr); 220 void (*flush_tlb_one_user)(unsigned long addr);
221 void (*flush_tlb_others)(const struct cpumask *cpus, 221 void (*flush_tlb_others)(const struct cpumask *cpus,
222 const struct flush_tlb_info *info); 222 const struct flush_tlb_info *info);
223 223
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index e67c0620aec2..e55466760ff8 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -61,7 +61,7 @@ void paging_init(void);
61#define kpte_clear_flush(ptep, vaddr) \ 61#define kpte_clear_flush(ptep, vaddr) \
62do { \ 62do { \
63 pte_clear(&init_mm, (vaddr), (ptep)); \ 63 pte_clear(&init_mm, (vaddr), (ptep)); \
64 __flush_tlb_one((vaddr)); \ 64 __flush_tlb_one_kernel((vaddr)); \
65} while (0) 65} while (0)
66 66
67#endif /* !__ASSEMBLY__ */ 67#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 793bae7e7ce3..1bd9ed87606f 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -91,7 +91,7 @@ struct cpuinfo_x86 {
91 __u8 x86; /* CPU family */ 91 __u8 x86; /* CPU family */
92 __u8 x86_vendor; /* CPU vendor */ 92 __u8 x86_vendor; /* CPU vendor */
93 __u8 x86_model; 93 __u8 x86_model;
94 __u8 x86_mask; 94 __u8 x86_stepping;
95#ifdef CONFIG_X86_64 95#ifdef CONFIG_X86_64
96 /* Number of 4K pages in DTLB/ITLB combined(in pages): */ 96 /* Number of 4K pages in DTLB/ITLB combined(in pages): */
97 int x86_tlbsize; 97 int x86_tlbsize;
@@ -109,7 +109,7 @@ struct cpuinfo_x86 {
109 char x86_vendor_id[16]; 109 char x86_vendor_id[16];
110 char x86_model_id[64]; 110 char x86_model_id[64];
111 /* in KB - valid for CPUS which support this call: */ 111 /* in KB - valid for CPUS which support this call: */
112 int x86_cache_size; 112 unsigned int x86_cache_size;
113 int x86_cache_alignment; /* In bytes */ 113 int x86_cache_alignment; /* In bytes */
114 /* Cache QoS architectural values: */ 114 /* Cache QoS architectural values: */
115 int x86_cache_max_rmid; /* max index */ 115 int x86_cache_max_rmid; /* max index */
@@ -977,7 +977,4 @@ bool xen_set_default_idle(void);
977 977
978void stop_this_cpu(void *dummy); 978void stop_this_cpu(void *dummy);
979void df_debug(struct pt_regs *regs, long error_code); 979void df_debug(struct pt_regs *regs, long error_code);
980
981void __ibp_barrier(void);
982
983#endif /* _ASM_X86_PROCESSOR_H */ 980#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 461f53d27708..a4189762b266 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -129,6 +129,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
129void cpu_disable_common(void); 129void cpu_disable_common(void);
130void native_smp_prepare_boot_cpu(void); 130void native_smp_prepare_boot_cpu(void);
131void native_smp_prepare_cpus(unsigned int max_cpus); 131void native_smp_prepare_cpus(unsigned int max_cpus);
132void calculate_max_logical_packages(void);
132void native_smp_cpus_done(unsigned int max_cpus); 133void native_smp_cpus_done(unsigned int max_cpus);
133void common_cpu_up(unsigned int cpunum, struct task_struct *tidle); 134void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
134int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); 135int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 2b8f18ca5874..84137c22fdfa 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -140,7 +140,7 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
140#else 140#else
141#define __flush_tlb() __native_flush_tlb() 141#define __flush_tlb() __native_flush_tlb()
142#define __flush_tlb_global() __native_flush_tlb_global() 142#define __flush_tlb_global() __native_flush_tlb_global()
143#define __flush_tlb_single(addr) __native_flush_tlb_single(addr) 143#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr)
144#endif 144#endif
145 145
146static inline bool tlb_defer_switch_to_init_mm(void) 146static inline bool tlb_defer_switch_to_init_mm(void)
@@ -400,7 +400,7 @@ static inline void __native_flush_tlb_global(void)
400/* 400/*
401 * flush one page in the user mapping 401 * flush one page in the user mapping
402 */ 402 */
403static inline void __native_flush_tlb_single(unsigned long addr) 403static inline void __native_flush_tlb_one_user(unsigned long addr)
404{ 404{
405 u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); 405 u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
406 406
@@ -437,18 +437,31 @@ static inline void __flush_tlb_all(void)
437/* 437/*
438 * flush one page in the kernel mapping 438 * flush one page in the kernel mapping
439 */ 439 */
440static inline void __flush_tlb_one(unsigned long addr) 440static inline void __flush_tlb_one_kernel(unsigned long addr)
441{ 441{
442 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); 442 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
443 __flush_tlb_single(addr); 443
444 /*
445 * If PTI is off, then __flush_tlb_one_user() is just INVLPG or its
446 * paravirt equivalent. Even with PCID, this is sufficient: we only
447 * use PCID if we also use global PTEs for the kernel mapping, and
448 * INVLPG flushes global translations across all address spaces.
449 *
450 * If PTI is on, then the kernel is mapped with non-global PTEs, and
451 * __flush_tlb_one_user() will flush the given address for the current
452 * kernel address space and for its usermode counterpart, but it does
453 * not flush it for other address spaces.
454 */
455 __flush_tlb_one_user(addr);
444 456
445 if (!static_cpu_has(X86_FEATURE_PTI)) 457 if (!static_cpu_has(X86_FEATURE_PTI))
446 return; 458 return;
447 459
448 /* 460 /*
449 * __flush_tlb_single() will have cleared the TLB entry for this ASID, 461 * See above. We need to propagate the flush to all other address
450 * but since kernel space is replicated across all, we must also 462 * spaces. In principle, we only need to propagate it to kernelmode
451 * invalidate all others. 463 * address spaces, but the extra bookkeeping we would need is not
464 * worth it.
452 */ 465 */
453 invalidate_other_asid(); 466 invalidate_other_asid();
454} 467}
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 6db28f17ff28..c88e0b127810 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -235,7 +235,7 @@ int amd_cache_northbridges(void)
235 if (boot_cpu_data.x86 == 0x10 && 235 if (boot_cpu_data.x86 == 0x10 &&
236 boot_cpu_data.x86_model >= 0x8 && 236 boot_cpu_data.x86_model >= 0x8 &&
237 (boot_cpu_data.x86_model > 0x9 || 237 (boot_cpu_data.x86_model > 0x9 ||
238 boot_cpu_data.x86_mask >= 0x1)) 238 boot_cpu_data.x86_stepping >= 0x1))
239 amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE; 239 amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
240 240
241 if (boot_cpu_data.x86 == 0x15) 241 if (boot_cpu_data.x86 == 0x15)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 25ddf02598d2..b203af0855b5 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -546,7 +546,7 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
546 546
547static u32 hsx_deadline_rev(void) 547static u32 hsx_deadline_rev(void)
548{ 548{
549 switch (boot_cpu_data.x86_mask) { 549 switch (boot_cpu_data.x86_stepping) {
550 case 0x02: return 0x3a; /* EP */ 550 case 0x02: return 0x3a; /* EP */
551 case 0x04: return 0x0f; /* EX */ 551 case 0x04: return 0x0f; /* EX */
552 } 552 }
@@ -556,7 +556,7 @@ static u32 hsx_deadline_rev(void)
556 556
557static u32 bdx_deadline_rev(void) 557static u32 bdx_deadline_rev(void)
558{ 558{
559 switch (boot_cpu_data.x86_mask) { 559 switch (boot_cpu_data.x86_stepping) {
560 case 0x02: return 0x00000011; 560 case 0x02: return 0x00000011;
561 case 0x03: return 0x0700000e; 561 case 0x03: return 0x0700000e;
562 case 0x04: return 0x0f00000c; 562 case 0x04: return 0x0f00000c;
@@ -568,7 +568,7 @@ static u32 bdx_deadline_rev(void)
568 568
569static u32 skx_deadline_rev(void) 569static u32 skx_deadline_rev(void)
570{ 570{
571 switch (boot_cpu_data.x86_mask) { 571 switch (boot_cpu_data.x86_stepping) {
572 case 0x03: return 0x01000136; 572 case 0x03: return 0x01000136;
573 case 0x04: return 0x02000014; 573 case 0x04: return 0x02000014;
574 } 574 }
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 46b675aaf20b..f11910b44638 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -1176,16 +1176,25 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
1176 1176
1177 uv_gre_table = gre; 1177 uv_gre_table = gre;
1178 for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { 1178 for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
1179 unsigned long size = ((unsigned long)(gre->limit - lgre)
1180 << UV_GAM_RANGE_SHFT);
1181 int order = 0;
1182 char suffix[] = " KMGTPE";
1183
1184 while (size > 9999 && order < sizeof(suffix)) {
1185 size /= 1024;
1186 order++;
1187 }
1188
1179 if (!index) { 1189 if (!index) {
1180 pr_info("UV: GAM Range Table...\n"); 1190 pr_info("UV: GAM Range Table...\n");
1181 pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN"); 1191 pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
1182 } 1192 }
1183 pr_info("UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n", 1193 pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d %04x %02x %02x\n",
1184 index++, 1194 index++,
1185 (unsigned long)lgre << UV_GAM_RANGE_SHFT, 1195 (unsigned long)lgre << UV_GAM_RANGE_SHFT,
1186 (unsigned long)gre->limit << UV_GAM_RANGE_SHFT, 1196 (unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
1187 ((unsigned long)(gre->limit - lgre)) >> 1197 size, suffix[order],
1188 (30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
1189 gre->type, gre->nasid, gre->sockid, gre->pnode); 1198 gre->type, gre->nasid, gre->sockid, gre->pnode);
1190 1199
1191 lgre = gre->limit; 1200 lgre = gre->limit;
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index fa1261eefa16..f91ba53e06c8 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -18,7 +18,7 @@ void foo(void)
18 OFFSET(CPUINFO_x86, cpuinfo_x86, x86); 18 OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
19 OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor); 19 OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
20 OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model); 20 OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
21 OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask); 21 OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping);
22 OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level); 22 OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
23 OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability); 23 OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
24 OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); 24 OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 5bddbdcbc4a3..f0e6456ca7d3 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -119,7 +119,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
119 return; 119 return;
120 } 120 }
121 121
122 if (c->x86_model == 6 && c->x86_mask == 1) { 122 if (c->x86_model == 6 && c->x86_stepping == 1) {
123 const int K6_BUG_LOOP = 1000000; 123 const int K6_BUG_LOOP = 1000000;
124 int n; 124 int n;
125 void (*f_vide)(void); 125 void (*f_vide)(void);
@@ -149,7 +149,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
149 149
150 /* K6 with old style WHCR */ 150 /* K6 with old style WHCR */
151 if (c->x86_model < 8 || 151 if (c->x86_model < 8 ||
152 (c->x86_model == 8 && c->x86_mask < 8)) { 152 (c->x86_model == 8 && c->x86_stepping < 8)) {
153 /* We can only write allocate on the low 508Mb */ 153 /* We can only write allocate on the low 508Mb */
154 if (mbytes > 508) 154 if (mbytes > 508)
155 mbytes = 508; 155 mbytes = 508;
@@ -168,7 +168,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
168 return; 168 return;
169 } 169 }
170 170
171 if ((c->x86_model == 8 && c->x86_mask > 7) || 171 if ((c->x86_model == 8 && c->x86_stepping > 7) ||
172 c->x86_model == 9 || c->x86_model == 13) { 172 c->x86_model == 9 || c->x86_model == 13) {
173 /* The more serious chips .. */ 173 /* The more serious chips .. */
174 174
@@ -221,7 +221,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
221 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx 221 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
222 * As per AMD technical note 27212 0.2 222 * As per AMD technical note 27212 0.2
223 */ 223 */
224 if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { 224 if ((c->x86_model == 8 && c->x86_stepping >= 1) || (c->x86_model > 8)) {
225 rdmsr(MSR_K7_CLK_CTL, l, h); 225 rdmsr(MSR_K7_CLK_CTL, l, h);
226 if ((l & 0xfff00000) != 0x20000000) { 226 if ((l & 0xfff00000) != 0x20000000) {
227 pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", 227 pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
@@ -241,12 +241,12 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
241 * but they are not certified as MP capable. 241 * but they are not certified as MP capable.
242 */ 242 */
243 /* Athlon 660/661 is valid. */ 243 /* Athlon 660/661 is valid. */
244 if ((c->x86_model == 6) && ((c->x86_mask == 0) || 244 if ((c->x86_model == 6) && ((c->x86_stepping == 0) ||
245 (c->x86_mask == 1))) 245 (c->x86_stepping == 1)))
246 return; 246 return;
247 247
248 /* Duron 670 is valid */ 248 /* Duron 670 is valid */
249 if ((c->x86_model == 7) && (c->x86_mask == 0)) 249 if ((c->x86_model == 7) && (c->x86_stepping == 0))
250 return; 250 return;
251 251
252 /* 252 /*
@@ -256,8 +256,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
256 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for 256 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
257 * more. 257 * more.
258 */ 258 */
259 if (((c->x86_model == 6) && (c->x86_mask >= 2)) || 259 if (((c->x86_model == 6) && (c->x86_stepping >= 2)) ||
260 ((c->x86_model == 7) && (c->x86_mask >= 1)) || 260 ((c->x86_model == 7) && (c->x86_stepping >= 1)) ||
261 (c->x86_model > 7)) 261 (c->x86_model > 7))
262 if (cpu_has(c, X86_FEATURE_MP)) 262 if (cpu_has(c, X86_FEATURE_MP))
263 return; 263 return;
@@ -628,7 +628,7 @@ static void early_init_amd(struct cpuinfo_x86 *c)
628 /* Set MTRR capability flag if appropriate */ 628 /* Set MTRR capability flag if appropriate */
629 if (c->x86 == 5) 629 if (c->x86 == 5)
630 if (c->x86_model == 13 || c->x86_model == 9 || 630 if (c->x86_model == 13 || c->x86_model == 9 ||
631 (c->x86_model == 8 && c->x86_mask >= 8)) 631 (c->x86_model == 8 && c->x86_stepping >= 8))
632 set_cpu_cap(c, X86_FEATURE_K6_MTRR); 632 set_cpu_cap(c, X86_FEATURE_K6_MTRR);
633#endif 633#endif
634#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) 634#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
@@ -795,7 +795,7 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
795 * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects 795 * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
796 * all up to and including B1. 796 * all up to and including B1.
797 */ 797 */
798 if (c->x86_model <= 1 && c->x86_mask <= 1) 798 if (c->x86_model <= 1 && c->x86_stepping <= 1)
799 set_cpu_cap(c, X86_FEATURE_CPB); 799 set_cpu_cap(c, X86_FEATURE_CPB);
800} 800}
801 801
@@ -906,11 +906,11 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
906 /* AMD errata T13 (order #21922) */ 906 /* AMD errata T13 (order #21922) */
907 if ((c->x86 == 6)) { 907 if ((c->x86 == 6)) {
908 /* Duron Rev A0 */ 908 /* Duron Rev A0 */
909 if (c->x86_model == 3 && c->x86_mask == 0) 909 if (c->x86_model == 3 && c->x86_stepping == 0)
910 size = 64; 910 size = 64;
911 /* Tbird rev A1/A2 */ 911 /* Tbird rev A1/A2 */
912 if (c->x86_model == 4 && 912 if (c->x86_model == 4 &&
913 (c->x86_mask == 0 || c->x86_mask == 1)) 913 (c->x86_stepping == 0 || c->x86_stepping == 1))
914 size = 256; 914 size = 256;
915 } 915 }
916 return size; 916 return size;
@@ -1047,7 +1047,7 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
1047 } 1047 }
1048 1048
1049 /* OSVW unavailable or ID unknown, match family-model-stepping range */ 1049 /* OSVW unavailable or ID unknown, match family-model-stepping range */
1050 ms = (cpu->x86_model << 4) | cpu->x86_mask; 1050 ms = (cpu->x86_model << 4) | cpu->x86_stepping;
1051 while ((range = *erratum++)) 1051 while ((range = *erratum++))
1052 if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && 1052 if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
1053 (ms >= AMD_MODEL_RANGE_START(range)) && 1053 (ms >= AMD_MODEL_RANGE_START(range)) &&
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 71949bf2de5a..d71c8b54b696 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -162,8 +162,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
162 if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) 162 if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
163 return SPECTRE_V2_CMD_NONE; 163 return SPECTRE_V2_CMD_NONE;
164 else { 164 else {
165 ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, 165 ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
166 sizeof(arg));
167 if (ret < 0) 166 if (ret < 0)
168 return SPECTRE_V2_CMD_AUTO; 167 return SPECTRE_V2_CMD_AUTO;
169 168
@@ -175,8 +174,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
175 } 174 }
176 175
177 if (i >= ARRAY_SIZE(mitigation_options)) { 176 if (i >= ARRAY_SIZE(mitigation_options)) {
178 pr_err("unknown option (%s). Switching to AUTO select\n", 177 pr_err("unknown option (%s). Switching to AUTO select\n", arg);
179 mitigation_options[i].option);
180 return SPECTRE_V2_CMD_AUTO; 178 return SPECTRE_V2_CMD_AUTO;
181 } 179 }
182 } 180 }
@@ -185,8 +183,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
185 cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || 183 cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
186 cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && 184 cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
187 !IS_ENABLED(CONFIG_RETPOLINE)) { 185 !IS_ENABLED(CONFIG_RETPOLINE)) {
188 pr_err("%s selected but not compiled in. Switching to AUTO select\n", 186 pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option);
189 mitigation_options[i].option);
190 return SPECTRE_V2_CMD_AUTO; 187 return SPECTRE_V2_CMD_AUTO;
191 } 188 }
192 189
@@ -256,14 +253,14 @@ static void __init spectre_v2_select_mitigation(void)
256 goto retpoline_auto; 253 goto retpoline_auto;
257 break; 254 break;
258 } 255 }
259 pr_err("kernel not compiled with retpoline; no mitigation available!"); 256 pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!");
260 return; 257 return;
261 258
262retpoline_auto: 259retpoline_auto:
263 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { 260 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
264 retpoline_amd: 261 retpoline_amd:
265 if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { 262 if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
266 pr_err("LFENCE not serializing. Switching to generic retpoline\n"); 263 pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
267 goto retpoline_generic; 264 goto retpoline_generic;
268 } 265 }
269 mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : 266 mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
@@ -281,7 +278,7 @@ retpoline_auto:
281 pr_info("%s\n", spectre_v2_strings[mode]); 278 pr_info("%s\n", spectre_v2_strings[mode]);
282 279
283 /* 280 /*
284 * If neither SMEP or KPTI are available, there is a risk of 281 * If neither SMEP nor PTI are available, there is a risk of
285 * hitting userspace addresses in the RSB after a context switch 282 * hitting userspace addresses in the RSB after a context switch
286 * from a shallow call stack to a deeper one. To prevent this fill 283 * from a shallow call stack to a deeper one. To prevent this fill
287 * the entire RSB, even when using IBRS. 284 * the entire RSB, even when using IBRS.
@@ -295,21 +292,20 @@ retpoline_auto:
295 if ((!boot_cpu_has(X86_FEATURE_PTI) && 292 if ((!boot_cpu_has(X86_FEATURE_PTI) &&
296 !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { 293 !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
297 setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); 294 setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
298 pr_info("Filling RSB on context switch\n"); 295 pr_info("Spectre v2 mitigation: Filling RSB on context switch\n");
299 } 296 }
300 297
301 /* Initialize Indirect Branch Prediction Barrier if supported */ 298 /* Initialize Indirect Branch Prediction Barrier if supported */
302 if (boot_cpu_has(X86_FEATURE_IBPB)) { 299 if (boot_cpu_has(X86_FEATURE_IBPB)) {
303 setup_force_cpu_cap(X86_FEATURE_USE_IBPB); 300 setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
304 pr_info("Enabling Indirect Branch Prediction Barrier\n"); 301 pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
305 } 302 }
306} 303}
307 304
308#undef pr_fmt 305#undef pr_fmt
309 306
310#ifdef CONFIG_SYSFS 307#ifdef CONFIG_SYSFS
311ssize_t cpu_show_meltdown(struct device *dev, 308ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
312 struct device_attribute *attr, char *buf)
313{ 309{
314 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) 310 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
315 return sprintf(buf, "Not affected\n"); 311 return sprintf(buf, "Not affected\n");
@@ -318,16 +314,14 @@ ssize_t cpu_show_meltdown(struct device *dev,
318 return sprintf(buf, "Vulnerable\n"); 314 return sprintf(buf, "Vulnerable\n");
319} 315}
320 316
321ssize_t cpu_show_spectre_v1(struct device *dev, 317ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
322 struct device_attribute *attr, char *buf)
323{ 318{
324 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) 319 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
325 return sprintf(buf, "Not affected\n"); 320 return sprintf(buf, "Not affected\n");
326 return sprintf(buf, "Mitigation: __user pointer sanitization\n"); 321 return sprintf(buf, "Mitigation: __user pointer sanitization\n");
327} 322}
328 323
329ssize_t cpu_show_spectre_v2(struct device *dev, 324ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
330 struct device_attribute *attr, char *buf)
331{ 325{
332 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) 326 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
333 return sprintf(buf, "Not affected\n"); 327 return sprintf(buf, "Not affected\n");
@@ -337,9 +331,3 @@ ssize_t cpu_show_spectre_v2(struct device *dev,
337 spectre_v2_module_string()); 331 spectre_v2_module_string());
338} 332}
339#endif 333#endif
340
341void __ibp_barrier(void)
342{
343 __wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0);
344}
345EXPORT_SYMBOL_GPL(__ibp_barrier);
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index c578cd29c2d2..e5ec0f11c0de 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -140,7 +140,7 @@ static void init_centaur(struct cpuinfo_x86 *c)
140 clear_cpu_cap(c, X86_FEATURE_TSC); 140 clear_cpu_cap(c, X86_FEATURE_TSC);
141 break; 141 break;
142 case 8: 142 case 8:
143 switch (c->x86_mask) { 143 switch (c->x86_stepping) {
144 default: 144 default:
145 name = "2"; 145 name = "2";
146 break; 146 break;
@@ -215,7 +215,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
215 * - Note, it seems this may only be in engineering samples. 215 * - Note, it seems this may only be in engineering samples.
216 */ 216 */
217 if ((c->x86 == 6) && (c->x86_model == 9) && 217 if ((c->x86 == 6) && (c->x86_model == 9) &&
218 (c->x86_mask == 1) && (size == 65)) 218 (c->x86_stepping == 1) && (size == 65))
219 size -= 1; 219 size -= 1;
220 return size; 220 return size;
221} 221}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d63f4b5706e4..824aee0117bb 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -731,7 +731,7 @@ void cpu_detect(struct cpuinfo_x86 *c)
731 cpuid(0x00000001, &tfms, &misc, &junk, &cap0); 731 cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
732 c->x86 = x86_family(tfms); 732 c->x86 = x86_family(tfms);
733 c->x86_model = x86_model(tfms); 733 c->x86_model = x86_model(tfms);
734 c->x86_mask = x86_stepping(tfms); 734 c->x86_stepping = x86_stepping(tfms);
735 735
736 if (cap0 & (1<<19)) { 736 if (cap0 & (1<<19)) {
737 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; 737 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
@@ -1184,9 +1184,9 @@ static void identify_cpu(struct cpuinfo_x86 *c)
1184 int i; 1184 int i;
1185 1185
1186 c->loops_per_jiffy = loops_per_jiffy; 1186 c->loops_per_jiffy = loops_per_jiffy;
1187 c->x86_cache_size = -1; 1187 c->x86_cache_size = 0;
1188 c->x86_vendor = X86_VENDOR_UNKNOWN; 1188 c->x86_vendor = X86_VENDOR_UNKNOWN;
1189 c->x86_model = c->x86_mask = 0; /* So far unknown... */ 1189 c->x86_model = c->x86_stepping = 0; /* So far unknown... */
1190 c->x86_vendor_id[0] = '\0'; /* Unset */ 1190 c->x86_vendor_id[0] = '\0'; /* Unset */
1191 c->x86_model_id[0] = '\0'; /* Unset */ 1191 c->x86_model_id[0] = '\0'; /* Unset */
1192 c->x86_max_cores = 1; 1192 c->x86_max_cores = 1;
@@ -1378,8 +1378,8 @@ void print_cpu_info(struct cpuinfo_x86 *c)
1378 1378
1379 pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model); 1379 pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
1380 1380
1381 if (c->x86_mask || c->cpuid_level >= 0) 1381 if (c->x86_stepping || c->cpuid_level >= 0)
1382 pr_cont(", stepping: 0x%x)\n", c->x86_mask); 1382 pr_cont(", stepping: 0x%x)\n", c->x86_stepping);
1383 else 1383 else
1384 pr_cont(")\n"); 1384 pr_cont(")\n");
1385} 1385}
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 6b4bb335641f..8949b7ae6d92 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -215,7 +215,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
215 215
216 /* common case step number/rev -- exceptions handled below */ 216 /* common case step number/rev -- exceptions handled below */
217 c->x86_model = (dir1 >> 4) + 1; 217 c->x86_model = (dir1 >> 4) + 1;
218 c->x86_mask = dir1 & 0xf; 218 c->x86_stepping = dir1 & 0xf;
219 219
220 /* Now cook; the original recipe is by Channing Corn, from Cyrix. 220 /* Now cook; the original recipe is by Channing Corn, from Cyrix.
221 * We do the same thing for each generation: we work out 221 * We do the same thing for each generation: we work out
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 319bf989fad1..d19e903214b4 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -116,14 +116,13 @@ struct sku_microcode {
116 u32 microcode; 116 u32 microcode;
117}; 117};
118static const struct sku_microcode spectre_bad_microcodes[] = { 118static const struct sku_microcode spectre_bad_microcodes[] = {
119 { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 }, 119 { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x80 },
120 { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 }, 120 { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x80 },
121 { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 }, 121 { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x80 },
122 { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 }, 122 { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x80 },
123 { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, 123 { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 },
124 { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, 124 { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e },
125 { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, 125 { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c },
126 { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 },
127 { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, 126 { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 },
128 { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, 127 { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 },
129 { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, 128 { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b },
@@ -136,8 +135,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = {
136 { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, 135 { INTEL_FAM6_HASWELL_X, 0x02, 0x3b },
137 { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, 136 { INTEL_FAM6_HASWELL_X, 0x04, 0x10 },
138 { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, 137 { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a },
139 /* Updated in the 20180108 release; blacklist until we know otherwise */
140 { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 },
141 /* Observed in the wild */ 138 /* Observed in the wild */
142 { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, 139 { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b },
143 { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, 140 { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 },
@@ -149,7 +146,7 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
149 146
150 for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { 147 for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
151 if (c->x86_model == spectre_bad_microcodes[i].model && 148 if (c->x86_model == spectre_bad_microcodes[i].model &&
152 c->x86_mask == spectre_bad_microcodes[i].stepping) 149 c->x86_stepping == spectre_bad_microcodes[i].stepping)
153 return (c->microcode <= spectre_bad_microcodes[i].microcode); 150 return (c->microcode <= spectre_bad_microcodes[i].microcode);
154 } 151 }
155 return false; 152 return false;
@@ -196,7 +193,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
196 * need the microcode to have already been loaded... so if it is 193 * need the microcode to have already been loaded... so if it is
197 * not, recommend a BIOS update and disable large pages. 194 * not, recommend a BIOS update and disable large pages.
198 */ 195 */
199 if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 && 196 if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 &&
200 c->microcode < 0x20e) { 197 c->microcode < 0x20e) {
201 pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n"); 198 pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
202 clear_cpu_cap(c, X86_FEATURE_PSE); 199 clear_cpu_cap(c, X86_FEATURE_PSE);
@@ -212,7 +209,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
212 209
213 /* CPUID workaround for 0F33/0F34 CPU */ 210 /* CPUID workaround for 0F33/0F34 CPU */
214 if (c->x86 == 0xF && c->x86_model == 0x3 211 if (c->x86 == 0xF && c->x86_model == 0x3
215 && (c->x86_mask == 0x3 || c->x86_mask == 0x4)) 212 && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4))
216 c->x86_phys_bits = 36; 213 c->x86_phys_bits = 36;
217 214
218 /* 215 /*
@@ -310,7 +307,7 @@ int ppro_with_ram_bug(void)
310 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 307 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
311 boot_cpu_data.x86 == 6 && 308 boot_cpu_data.x86 == 6 &&
312 boot_cpu_data.x86_model == 1 && 309 boot_cpu_data.x86_model == 1 &&
313 boot_cpu_data.x86_mask < 8) { 310 boot_cpu_data.x86_stepping < 8) {
314 pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n"); 311 pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
315 return 1; 312 return 1;
316 } 313 }
@@ -327,7 +324,7 @@ static void intel_smp_check(struct cpuinfo_x86 *c)
327 * Mask B, Pentium, but not Pentium MMX 324 * Mask B, Pentium, but not Pentium MMX
328 */ 325 */
329 if (c->x86 == 5 && 326 if (c->x86 == 5 &&
330 c->x86_mask >= 1 && c->x86_mask <= 4 && 327 c->x86_stepping >= 1 && c->x86_stepping <= 4 &&
331 c->x86_model <= 3) { 328 c->x86_model <= 3) {
332 /* 329 /*
333 * Remember we have B step Pentia with bugs 330 * Remember we have B step Pentia with bugs
@@ -370,7 +367,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
370 * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until 367 * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until
371 * model 3 mask 3 368 * model 3 mask 3
372 */ 369 */
373 if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) 370 if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633)
374 clear_cpu_cap(c, X86_FEATURE_SEP); 371 clear_cpu_cap(c, X86_FEATURE_SEP);
375 372
376 /* 373 /*
@@ -388,7 +385,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
388 * P4 Xeon erratum 037 workaround. 385 * P4 Xeon erratum 037 workaround.
389 * Hardware prefetcher may cause stale data to be loaded into the cache. 386 * Hardware prefetcher may cause stale data to be loaded into the cache.
390 */ 387 */
391 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { 388 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) {
392 if (msr_set_bit(MSR_IA32_MISC_ENABLE, 389 if (msr_set_bit(MSR_IA32_MISC_ENABLE,
393 MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) { 390 MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) {
394 pr_info("CPU: C0 stepping P4 Xeon detected.\n"); 391 pr_info("CPU: C0 stepping P4 Xeon detected.\n");
@@ -403,7 +400,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
403 * Specification Update"). 400 * Specification Update").
404 */ 401 */
405 if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 && 402 if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
406 (c->x86_mask < 0x6 || c->x86_mask == 0xb)) 403 (c->x86_stepping < 0x6 || c->x86_stepping == 0xb))
407 set_cpu_bug(c, X86_BUG_11AP); 404 set_cpu_bug(c, X86_BUG_11AP);
408 405
409 406
@@ -650,7 +647,7 @@ static void init_intel(struct cpuinfo_x86 *c)
650 case 6: 647 case 6:
651 if (l2 == 128) 648 if (l2 == 128)
652 p = "Celeron (Mendocino)"; 649 p = "Celeron (Mendocino)";
653 else if (c->x86_mask == 0 || c->x86_mask == 5) 650 else if (c->x86_stepping == 0 || c->x86_stepping == 5)
654 p = "Celeron-A"; 651 p = "Celeron-A";
655 break; 652 break;
656 653
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 410629f10ad3..589b948e6e01 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -819,7 +819,7 @@ static __init void rdt_quirks(void)
819 cache_alloc_hsw_probe(); 819 cache_alloc_hsw_probe();
820 break; 820 break;
821 case INTEL_FAM6_SKYLAKE_X: 821 case INTEL_FAM6_SKYLAKE_X:
822 if (boot_cpu_data.x86_mask <= 4) 822 if (boot_cpu_data.x86_stepping <= 4)
823 set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat"); 823 set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat");
824 } 824 }
825} 825}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index aa0d5df9dc60..e956eb267061 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -115,4 +115,19 @@ static inline void mce_unregister_injector_chain(struct notifier_block *nb) { }
115 115
116extern struct mca_config mca_cfg; 116extern struct mca_config mca_cfg;
117 117
118#ifndef CONFIG_X86_64
119/*
120 * On 32-bit systems it would be difficult to safely unmap a poison page
121 * from the kernel 1:1 map because there are no non-canonical addresses that
122 * we can use to refer to the address without risking a speculative access.
123 * However, this isn't much of an issue because:
124 * 1) Few unmappable pages are in the 1:1 map. Most are in HIGHMEM which
125 * are only mapped into the kernel as needed
126 * 2) Few people would run a 32-bit kernel on a machine that supports
127 * recoverable errors because they have too much memory to boot 32-bit.
128 */
129static inline void mce_unmap_kpfn(unsigned long pfn) {}
130#define mce_unmap_kpfn mce_unmap_kpfn
131#endif
132
118#endif /* __X86_MCE_INTERNAL_H__ */ 133#endif /* __X86_MCE_INTERNAL_H__ */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3a8e88a611eb..8ff94d1e2dce 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -105,6 +105,10 @@ static struct irq_work mce_irq_work;
105 105
106static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); 106static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
107 107
108#ifndef mce_unmap_kpfn
109static void mce_unmap_kpfn(unsigned long pfn);
110#endif
111
108/* 112/*
109 * CPU/chipset specific EDAC code can register a notifier call here to print 113 * CPU/chipset specific EDAC code can register a notifier call here to print
110 * MCE errors in a human-readable form. 114 * MCE errors in a human-readable form.
@@ -234,7 +238,7 @@ static void __print_mce(struct mce *m)
234 m->cs, m->ip); 238 m->cs, m->ip);
235 239
236 if (m->cs == __KERNEL_CS) 240 if (m->cs == __KERNEL_CS)
237 pr_cont("{%pS}", (void *)m->ip); 241 pr_cont("{%pS}", (void *)(unsigned long)m->ip);
238 pr_cont("\n"); 242 pr_cont("\n");
239 } 243 }
240 244
@@ -590,7 +594,8 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
590 594
591 if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) { 595 if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
592 pfn = mce->addr >> PAGE_SHIFT; 596 pfn = mce->addr >> PAGE_SHIFT;
593 memory_failure(pfn, 0); 597 if (!memory_failure(pfn, 0))
598 mce_unmap_kpfn(pfn);
594 } 599 }
595 600
596 return NOTIFY_OK; 601 return NOTIFY_OK;
@@ -1057,12 +1062,13 @@ static int do_memory_failure(struct mce *m)
1057 ret = memory_failure(m->addr >> PAGE_SHIFT, flags); 1062 ret = memory_failure(m->addr >> PAGE_SHIFT, flags);
1058 if (ret) 1063 if (ret)
1059 pr_err("Memory error not recovered"); 1064 pr_err("Memory error not recovered");
1065 else
1066 mce_unmap_kpfn(m->addr >> PAGE_SHIFT);
1060 return ret; 1067 return ret;
1061} 1068}
1062 1069
1063#if defined(arch_unmap_kpfn) && defined(CONFIG_MEMORY_FAILURE) 1070#ifndef mce_unmap_kpfn
1064 1071static void mce_unmap_kpfn(unsigned long pfn)
1065void arch_unmap_kpfn(unsigned long pfn)
1066{ 1072{
1067 unsigned long decoy_addr; 1073 unsigned long decoy_addr;
1068 1074
@@ -1073,7 +1079,7 @@ void arch_unmap_kpfn(unsigned long pfn)
1073 * We would like to just call: 1079 * We would like to just call:
1074 * set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1); 1080 * set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
1075 * but doing that would radically increase the odds of a 1081 * but doing that would radically increase the odds of a
1076 * speculative access to the posion page because we'd have 1082 * speculative access to the poison page because we'd have
1077 * the virtual address of the kernel 1:1 mapping sitting 1083 * the virtual address of the kernel 1:1 mapping sitting
1078 * around in registers. 1084 * around in registers.
1079 * Instead we get tricky. We create a non-canonical address 1085 * Instead we get tricky. We create a non-canonical address
@@ -1098,7 +1104,6 @@ void arch_unmap_kpfn(unsigned long pfn)
1098 1104
1099 if (set_memory_np(decoy_addr, 1)) 1105 if (set_memory_np(decoy_addr, 1))
1100 pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); 1106 pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
1101
1102} 1107}
1103#endif 1108#endif
1104 1109
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index f7c55b0e753a..a15db2b4e0d6 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -921,7 +921,7 @@ static bool is_blacklisted(unsigned int cpu)
921 */ 921 */
922 if (c->x86 == 6 && 922 if (c->x86 == 6 &&
923 c->x86_model == INTEL_FAM6_BROADWELL_X && 923 c->x86_model == INTEL_FAM6_BROADWELL_X &&
924 c->x86_mask == 0x01 && 924 c->x86_stepping == 0x01 &&
925 llc_size_per_core > 2621440 && 925 llc_size_per_core > 2621440 &&
926 c->microcode < 0x0b000021) { 926 c->microcode < 0x0b000021) {
927 pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode); 927 pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
@@ -944,7 +944,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device,
944 return UCODE_NFOUND; 944 return UCODE_NFOUND;
945 945
946 sprintf(name, "intel-ucode/%02x-%02x-%02x", 946 sprintf(name, "intel-ucode/%02x-%02x-%02x",
947 c->x86, c->x86_model, c->x86_mask); 947 c->x86, c->x86_model, c->x86_stepping);
948 948
949 if (request_firmware_direct(&firmware, name, device)) { 949 if (request_firmware_direct(&firmware, name, device)) {
950 pr_debug("data file %s load failed\n", name); 950 pr_debug("data file %s load failed\n", name);
@@ -982,7 +982,7 @@ static struct microcode_ops microcode_intel_ops = {
982 982
983static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c) 983static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c)
984{ 984{
985 u64 llc_size = c->x86_cache_size * 1024; 985 u64 llc_size = c->x86_cache_size * 1024ULL;
986 986
987 do_div(llc_size, c->x86_max_cores); 987 do_div(llc_size, c->x86_max_cores);
988 988
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index fdc55215d44d..e12ee86906c6 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -859,7 +859,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size,
859 */ 859 */
860 if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && 860 if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
861 boot_cpu_data.x86_model == 1 && 861 boot_cpu_data.x86_model == 1 &&
862 boot_cpu_data.x86_mask <= 7) { 862 boot_cpu_data.x86_stepping <= 7) {
863 if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { 863 if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
864 pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); 864 pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
865 return -EINVAL; 865 return -EINVAL;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 40d5a8a75212..7468de429087 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -711,8 +711,8 @@ void __init mtrr_bp_init(void)
711 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 711 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
712 boot_cpu_data.x86 == 0xF && 712 boot_cpu_data.x86 == 0xF &&
713 boot_cpu_data.x86_model == 0x3 && 713 boot_cpu_data.x86_model == 0x3 &&
714 (boot_cpu_data.x86_mask == 0x3 || 714 (boot_cpu_data.x86_stepping == 0x3 ||
715 boot_cpu_data.x86_mask == 0x4)) 715 boot_cpu_data.x86_stepping == 0x4))
716 phys_addr = 36; 716 phys_addr = 36;
717 717
718 size_or_mask = SIZE_OR_MASK_BITS(phys_addr); 718 size_or_mask = SIZE_OR_MASK_BITS(phys_addr);
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index e7ecedafa1c8..2c8522a39ed5 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -72,8 +72,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
72 c->x86_model, 72 c->x86_model,
73 c->x86_model_id[0] ? c->x86_model_id : "unknown"); 73 c->x86_model_id[0] ? c->x86_model_id : "unknown");
74 74
75 if (c->x86_mask || c->cpuid_level >= 0) 75 if (c->x86_stepping || c->cpuid_level >= 0)
76 seq_printf(m, "stepping\t: %d\n", c->x86_mask); 76 seq_printf(m, "stepping\t: %d\n", c->x86_stepping);
77 else 77 else
78 seq_puts(m, "stepping\t: unknown\n"); 78 seq_puts(m, "stepping\t: unknown\n");
79 if (c->microcode) 79 if (c->microcode)
@@ -91,8 +91,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
91 } 91 }
92 92
93 /* Cache size */ 93 /* Cache size */
94 if (c->x86_cache_size >= 0) 94 if (c->x86_cache_size)
95 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); 95 seq_printf(m, "cache size\t: %u KB\n", c->x86_cache_size);
96 96
97 show_cpuinfo_core(m, c, cpu); 97 show_cpuinfo_core(m, c, cpu);
98 show_cpuinfo_misc(m, c); 98 show_cpuinfo_misc(m, c);
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index c29020907886..b59e4fb40fd9 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -37,7 +37,7 @@
37#define X86 new_cpu_data+CPUINFO_x86 37#define X86 new_cpu_data+CPUINFO_x86
38#define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor 38#define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor
39#define X86_MODEL new_cpu_data+CPUINFO_x86_model 39#define X86_MODEL new_cpu_data+CPUINFO_x86_model
40#define X86_MASK new_cpu_data+CPUINFO_x86_mask 40#define X86_STEPPING new_cpu_data+CPUINFO_x86_stepping
41#define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math 41#define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math
42#define X86_CPUID new_cpu_data+CPUINFO_cpuid_level 42#define X86_CPUID new_cpu_data+CPUINFO_cpuid_level
43#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability 43#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
@@ -332,7 +332,7 @@ ENTRY(startup_32_smp)
332 shrb $4,%al 332 shrb $4,%al
333 movb %al,X86_MODEL 333 movb %al,X86_MODEL
334 andb $0x0f,%cl # mask mask revision 334 andb $0x0f,%cl # mask mask revision
335 movb %cl,X86_MASK 335 movb %cl,X86_STEPPING
336 movl %edx,X86_CAPABILITY 336 movl %edx,X86_CAPABILITY
337 337
338.Lis486: 338.Lis486:
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 27d0a1712663..f1c5eb99d445 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -410,7 +410,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
410 processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01; 410 processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
411 processor.cpuflag = CPU_ENABLED; 411 processor.cpuflag = CPU_ENABLED;
412 processor.cpufeature = (boot_cpu_data.x86 << 8) | 412 processor.cpufeature = (boot_cpu_data.x86 << 8) |
413 (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; 413 (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_stepping;
414 processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX]; 414 processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX];
415 processor.reserved[0] = 0; 415 processor.reserved[0] = 0;
416 processor.reserved[1] = 0; 416 processor.reserved[1] = 0;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 041096bdef86..99dc79e76bdc 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -200,9 +200,9 @@ static void native_flush_tlb_global(void)
200 __native_flush_tlb_global(); 200 __native_flush_tlb_global();
201} 201}
202 202
203static void native_flush_tlb_single(unsigned long addr) 203static void native_flush_tlb_one_user(unsigned long addr)
204{ 204{
205 __native_flush_tlb_single(addr); 205 __native_flush_tlb_one_user(addr);
206} 206}
207 207
208struct static_key paravirt_steal_enabled; 208struct static_key paravirt_steal_enabled;
@@ -401,7 +401,7 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
401 401
402 .flush_tlb_user = native_flush_tlb, 402 .flush_tlb_user = native_flush_tlb,
403 .flush_tlb_kernel = native_flush_tlb_global, 403 .flush_tlb_kernel = native_flush_tlb_global,
404 .flush_tlb_single = native_flush_tlb_single, 404 .flush_tlb_one_user = native_flush_tlb_one_user,
405 .flush_tlb_others = native_flush_tlb_others, 405 .flush_tlb_others = native_flush_tlb_others,
406 406
407 .pgd_alloc = __paravirt_pgd_alloc, 407 .pgd_alloc = __paravirt_pgd_alloc,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6f27facbaa9b..9eee25d07586 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1281,11 +1281,10 @@ void __init native_smp_prepare_boot_cpu(void)
1281 cpu_set_state_online(me); 1281 cpu_set_state_online(me);
1282} 1282}
1283 1283
1284void __init native_smp_cpus_done(unsigned int max_cpus) 1284void __init calculate_max_logical_packages(void)
1285{ 1285{
1286 int ncpus; 1286 int ncpus;
1287 1287
1288 pr_debug("Boot done\n");
1289 /* 1288 /*
1290 * Today neither Intel nor AMD support heterogenous systems so 1289 * Today neither Intel nor AMD support heterogenous systems so
1291 * extrapolate the boot cpu's data to all packages. 1290 * extrapolate the boot cpu's data to all packages.
@@ -1293,6 +1292,13 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1293 ncpus = cpu_data(0).booted_cores * topology_max_smt_threads(); 1292 ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
1294 __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus); 1293 __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
1295 pr_info("Max logical packages: %u\n", __max_logical_packages); 1294 pr_info("Max logical packages: %u\n", __max_logical_packages);
1295}
1296
1297void __init native_smp_cpus_done(unsigned int max_cpus)
1298{
1299 pr_debug("Boot done\n");
1300
1301 calculate_max_logical_packages();
1296 1302
1297 if (x86_has_numa_in_package) 1303 if (x86_has_numa_in_package)
1298 set_sched_topology(x86_numa_in_package_topology); 1304 set_sched_topology(x86_numa_in_package_topology);
@@ -1430,7 +1436,6 @@ static void remove_siblinginfo(int cpu)
1430 cpumask_clear(cpu_llc_shared_mask(cpu)); 1436 cpumask_clear(cpu_llc_shared_mask(cpu));
1431 cpumask_clear(topology_sibling_cpumask(cpu)); 1437 cpumask_clear(topology_sibling_cpumask(cpu));
1432 cpumask_clear(topology_core_cpumask(cpu)); 1438 cpumask_clear(topology_core_cpumask(cpu));
1433 c->phys_proc_id = 0;
1434 c->cpu_core_id = 0; 1439 c->cpu_core_id = 0;
1435 cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); 1440 cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
1436 recompute_smt_state(); 1441 recompute_smt_state();
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 446c9ef8cfc3..3d9b2308e7fa 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -181,7 +181,7 @@ int fixup_bug(struct pt_regs *regs, int trapnr)
181 break; 181 break;
182 182
183 case BUG_TRAP_TYPE_WARN: 183 case BUG_TRAP_TYPE_WARN:
184 regs->ip += LEN_UD0; 184 regs->ip += LEN_UD2;
185 return 1; 185 return 1;
186 } 186 }
187 187
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8eca1d04aeb8..46ff304140c7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -5080,7 +5080,7 @@ void kvm_mmu_uninit_vm(struct kvm *kvm)
5080typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); 5080typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
5081 5081
5082/* The caller should hold mmu-lock before calling this function. */ 5082/* The caller should hold mmu-lock before calling this function. */
5083static bool 5083static __always_inline bool
5084slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, 5084slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
5085 slot_level_handler fn, int start_level, int end_level, 5085 slot_level_handler fn, int start_level, int end_level,
5086 gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb) 5086 gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb)
@@ -5110,7 +5110,7 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
5110 return flush; 5110 return flush;
5111} 5111}
5112 5112
5113static bool 5113static __always_inline bool
5114slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, 5114slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
5115 slot_level_handler fn, int start_level, int end_level, 5115 slot_level_handler fn, int start_level, int end_level,
5116 bool lock_flush_tlb) 5116 bool lock_flush_tlb)
@@ -5121,7 +5121,7 @@ slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
5121 lock_flush_tlb); 5121 lock_flush_tlb);
5122} 5122}
5123 5123
5124static bool 5124static __always_inline bool
5125slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, 5125slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
5126 slot_level_handler fn, bool lock_flush_tlb) 5126 slot_level_handler fn, bool lock_flush_tlb)
5127{ 5127{
@@ -5129,7 +5129,7 @@ slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
5129 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); 5129 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
5130} 5130}
5131 5131
5132static bool 5132static __always_inline bool
5133slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, 5133slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
5134 slot_level_handler fn, bool lock_flush_tlb) 5134 slot_level_handler fn, bool lock_flush_tlb)
5135{ 5135{
@@ -5137,7 +5137,7 @@ slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
5137 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); 5137 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
5138} 5138}
5139 5139
5140static bool 5140static __always_inline bool
5141slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot, 5141slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
5142 slot_level_handler fn, bool lock_flush_tlb) 5142 slot_level_handler fn, bool lock_flush_tlb)
5143{ 5143{
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f427723dc7db..3dec126aa302 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10136,7 +10136,10 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
10136 (unsigned long)(vmcs12->posted_intr_desc_addr & 10136 (unsigned long)(vmcs12->posted_intr_desc_addr &
10137 (PAGE_SIZE - 1))); 10137 (PAGE_SIZE - 1)));
10138 } 10138 }
10139 if (!nested_vmx_prepare_msr_bitmap(vcpu, vmcs12)) 10139 if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
10140 vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
10141 CPU_BASED_USE_MSR_BITMAPS);
10142 else
10140 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, 10143 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
10141 CPU_BASED_USE_MSR_BITMAPS); 10144 CPU_BASED_USE_MSR_BITMAPS);
10142} 10145}
@@ -10224,8 +10227,8 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
10224 * updated to reflect this when L1 (or its L2s) actually write to 10227 * updated to reflect this when L1 (or its L2s) actually write to
10225 * the MSR. 10228 * the MSR.
10226 */ 10229 */
10227 bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); 10230 bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
10228 bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); 10231 bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
10229 10232
10230 /* Nothing to do if the MSR bitmap is not in use. */ 10233 /* Nothing to do if the MSR bitmap is not in use. */
10231 if (!cpu_has_vmx_msr_bitmap() || 10234 if (!cpu_has_vmx_msr_bitmap() ||
diff --git a/arch/x86/lib/cpu.c b/arch/x86/lib/cpu.c
index d6f848d1211d..2dd1fe13a37b 100644
--- a/arch/x86/lib/cpu.c
+++ b/arch/x86/lib/cpu.c
@@ -18,7 +18,7 @@ unsigned int x86_model(unsigned int sig)
18{ 18{
19 unsigned int fam, model; 19 unsigned int fam, model;
20 20
21 fam = x86_family(sig); 21 fam = x86_family(sig);
22 22
23 model = (sig >> 4) & 0xf; 23 model = (sig >> 4) & 0xf;
24 24
diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c
index 7b881d03d0dd..3cdf06128d13 100644
--- a/arch/x86/lib/error-inject.c
+++ b/arch/x86/lib/error-inject.c
@@ -7,6 +7,7 @@ asmlinkage void just_return_func(void);
7 7
8asm( 8asm(
9 ".type just_return_func, @function\n" 9 ".type just_return_func, @function\n"
10 ".globl just_return_func\n"
10 "just_return_func:\n" 11 "just_return_func:\n"
11 " ret\n" 12 " ret\n"
12 ".size just_return_func, .-just_return_func\n" 13 ".size just_return_func, .-just_return_func\n"
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 1ab42c852069..8b72923f1d35 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -256,7 +256,7 @@ static void __set_pte_vaddr(pud_t *pud, unsigned long vaddr, pte_t new_pte)
256 * It's enough to flush this one mapping. 256 * It's enough to flush this one mapping.
257 * (PGE mappings get flushed as well) 257 * (PGE mappings get flushed as well)
258 */ 258 */
259 __flush_tlb_one(vaddr); 259 __flush_tlb_one_kernel(vaddr);
260} 260}
261 261
262void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte) 262void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte)
@@ -1193,8 +1193,8 @@ void __init mem_init(void)
1193 register_page_bootmem_info(); 1193 register_page_bootmem_info();
1194 1194
1195 /* Register memory areas for /proc/kcore */ 1195 /* Register memory areas for /proc/kcore */
1196 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, 1196 if (get_gate_vma(&init_mm))
1197 PAGE_SIZE, KCORE_OTHER); 1197 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER);
1198 1198
1199 mem_init_print_info(NULL); 1199 mem_init_print_info(NULL);
1200} 1200}
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index c45b6ec5357b..e2db83bebc3b 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -820,5 +820,5 @@ void __init __early_set_fixmap(enum fixed_addresses idx,
820 set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); 820 set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
821 else 821 else
822 pte_clear(&init_mm, addr, pte); 822 pte_clear(&init_mm, addr, pte);
823 __flush_tlb_one(addr); 823 __flush_tlb_one_kernel(addr);
824} 824}
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 58477ec3d66d..7c8686709636 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -168,7 +168,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
168 return -1; 168 return -1;
169 } 169 }
170 170
171 __flush_tlb_one(f->addr); 171 __flush_tlb_one_kernel(f->addr);
172 return 0; 172 return 0;
173} 173}
174 174
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index c3c5274410a9..9bb7f0ab9fe6 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -63,7 +63,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
63 * It's enough to flush this one mapping. 63 * It's enough to flush this one mapping.
64 * (PGE mappings get flushed as well) 64 * (PGE mappings get flushed as well)
65 */ 65 */
66 __flush_tlb_one(vaddr); 66 __flush_tlb_one_kernel(vaddr);
67} 67}
68 68
69unsigned long __FIXADDR_TOP = 0xfffff000; 69unsigned long __FIXADDR_TOP = 0xfffff000;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 8dcc0607f805..7f1a51399674 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -498,7 +498,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
498 * flush that changes context.tlb_gen from 2 to 3. If they get 498 * flush that changes context.tlb_gen from 2 to 3. If they get
499 * processed on this CPU in reverse order, we'll see 499 * processed on this CPU in reverse order, we'll see
500 * local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL. 500 * local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL.
501 * If we were to use __flush_tlb_single() and set local_tlb_gen to 501 * If we were to use __flush_tlb_one_user() and set local_tlb_gen to
502 * 3, we'd be break the invariant: we'd update local_tlb_gen above 502 * 3, we'd be break the invariant: we'd update local_tlb_gen above
503 * 1 without the full flush that's needed for tlb_gen 2. 503 * 1 without the full flush that's needed for tlb_gen 2.
504 * 504 *
@@ -519,7 +519,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
519 519
520 addr = f->start; 520 addr = f->start;
521 while (addr < f->end) { 521 while (addr < f->end) {
522 __flush_tlb_single(addr); 522 __flush_tlb_one_user(addr);
523 addr += PAGE_SIZE; 523 addr += PAGE_SIZE;
524 } 524 }
525 if (local) 525 if (local)
@@ -666,7 +666,7 @@ static void do_kernel_range_flush(void *info)
666 666
667 /* flush range by one by one 'invlpg' */ 667 /* flush range by one by one 'invlpg' */
668 for (addr = f->start; addr < f->end; addr += PAGE_SIZE) 668 for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
669 __flush_tlb_one(addr); 669 __flush_tlb_one_kernel(addr);
670} 670}
671 671
672void flush_tlb_kernel_range(unsigned long start, unsigned long end) 672void flush_tlb_kernel_range(unsigned long start, unsigned long end)
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index c2e9285d1bf1..db77e087adaf 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
299 local_flush_tlb(); 299 local_flush_tlb();
300 stat->d_alltlb++; 300 stat->d_alltlb++;
301 } else { 301 } else {
302 __flush_tlb_single(msg->address); 302 __flush_tlb_one_user(msg->address);
303 stat->d_onetlb++; 303 stat->d_onetlb++;
304 } 304 }
305 stat->d_requestee++; 305 stat->d_requestee++;
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index d85076223a69..aae88fec9941 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1300,12 +1300,12 @@ static void xen_flush_tlb(void)
1300 preempt_enable(); 1300 preempt_enable();
1301} 1301}
1302 1302
1303static void xen_flush_tlb_single(unsigned long addr) 1303static void xen_flush_tlb_one_user(unsigned long addr)
1304{ 1304{
1305 struct mmuext_op *op; 1305 struct mmuext_op *op;
1306 struct multicall_space mcs; 1306 struct multicall_space mcs;
1307 1307
1308 trace_xen_mmu_flush_tlb_single(addr); 1308 trace_xen_mmu_flush_tlb_one_user(addr);
1309 1309
1310 preempt_disable(); 1310 preempt_disable();
1311 1311
@@ -2370,7 +2370,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
2370 2370
2371 .flush_tlb_user = xen_flush_tlb, 2371 .flush_tlb_user = xen_flush_tlb,
2372 .flush_tlb_kernel = xen_flush_tlb, 2372 .flush_tlb_kernel = xen_flush_tlb,
2373 .flush_tlb_single = xen_flush_tlb_single, 2373 .flush_tlb_one_user = xen_flush_tlb_one_user,
2374 .flush_tlb_others = xen_flush_tlb_others, 2374 .flush_tlb_others = xen_flush_tlb_others,
2375 2375
2376 .pgd_alloc = xen_pgd_alloc, 2376 .pgd_alloc = xen_pgd_alloc,
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 77c959cf81e7..7a43b2ae19f1 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -122,6 +122,8 @@ void __init xen_smp_cpus_done(unsigned int max_cpus)
122 122
123 if (xen_hvm_domain()) 123 if (xen_hvm_domain())
124 native_smp_cpus_done(max_cpus); 124 native_smp_cpus_done(max_cpus);
125 else
126 calculate_max_logical_packages();
125 127
126 if (xen_have_vcpu_info_placement) 128 if (xen_have_vcpu_info_placement)
127 return; 129 return;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index df93102e2149..357492712b0e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3164,6 +3164,7 @@ static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
3164 cpu_relax(); 3164 cpu_relax();
3165 } 3165 }
3166 3166
3167 __set_current_state(TASK_RUNNING);
3167 return false; 3168 return false;
3168} 3169}
3169 3170
diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c
index a965b9d80559..ded148783303 100644
--- a/crypto/sha3_generic.c
+++ b/crypto/sha3_generic.c
@@ -20,6 +20,20 @@
20#include <crypto/sha3.h> 20#include <crypto/sha3.h>
21#include <asm/unaligned.h> 21#include <asm/unaligned.h>
22 22
23/*
24 * On some 32-bit architectures (mn10300 and h8300), GCC ends up using
25 * over 1 KB of stack if we inline the round calculation into the loop
26 * in keccakf(). On the other hand, on 64-bit architectures with plenty
27 * of [64-bit wide] general purpose registers, not inlining it severely
28 * hurts performance. So let's use 64-bitness as a heuristic to decide
29 * whether to inline or not.
30 */
31#ifdef CONFIG_64BIT
32#define SHA3_INLINE inline
33#else
34#define SHA3_INLINE noinline
35#endif
36
23#define KECCAK_ROUNDS 24 37#define KECCAK_ROUNDS 24
24 38
25static const u64 keccakf_rndc[24] = { 39static const u64 keccakf_rndc[24] = {
@@ -35,111 +49,115 @@ static const u64 keccakf_rndc[24] = {
35 49
36/* update the state with given number of rounds */ 50/* update the state with given number of rounds */
37 51
38static void __attribute__((__optimize__("O3"))) keccakf(u64 st[25]) 52static SHA3_INLINE void keccakf_round(u64 st[25])
39{ 53{
40 u64 t[5], tt, bc[5]; 54 u64 t[5], tt, bc[5];
41 int round;
42 55
43 for (round = 0; round < KECCAK_ROUNDS; round++) { 56 /* Theta */
57 bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
58 bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
59 bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
60 bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
61 bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
62
63 t[0] = bc[4] ^ rol64(bc[1], 1);
64 t[1] = bc[0] ^ rol64(bc[2], 1);
65 t[2] = bc[1] ^ rol64(bc[3], 1);
66 t[3] = bc[2] ^ rol64(bc[4], 1);
67 t[4] = bc[3] ^ rol64(bc[0], 1);
68
69 st[0] ^= t[0];
70
71 /* Rho Pi */
72 tt = st[1];
73 st[ 1] = rol64(st[ 6] ^ t[1], 44);
74 st[ 6] = rol64(st[ 9] ^ t[4], 20);
75 st[ 9] = rol64(st[22] ^ t[2], 61);
76 st[22] = rol64(st[14] ^ t[4], 39);
77 st[14] = rol64(st[20] ^ t[0], 18);
78 st[20] = rol64(st[ 2] ^ t[2], 62);
79 st[ 2] = rol64(st[12] ^ t[2], 43);
80 st[12] = rol64(st[13] ^ t[3], 25);
81 st[13] = rol64(st[19] ^ t[4], 8);
82 st[19] = rol64(st[23] ^ t[3], 56);
83 st[23] = rol64(st[15] ^ t[0], 41);
84 st[15] = rol64(st[ 4] ^ t[4], 27);
85 st[ 4] = rol64(st[24] ^ t[4], 14);
86 st[24] = rol64(st[21] ^ t[1], 2);
87 st[21] = rol64(st[ 8] ^ t[3], 55);
88 st[ 8] = rol64(st[16] ^ t[1], 45);
89 st[16] = rol64(st[ 5] ^ t[0], 36);
90 st[ 5] = rol64(st[ 3] ^ t[3], 28);
91 st[ 3] = rol64(st[18] ^ t[3], 21);
92 st[18] = rol64(st[17] ^ t[2], 15);
93 st[17] = rol64(st[11] ^ t[1], 10);
94 st[11] = rol64(st[ 7] ^ t[2], 6);
95 st[ 7] = rol64(st[10] ^ t[0], 3);
96 st[10] = rol64( tt ^ t[1], 1);
97
98 /* Chi */
99 bc[ 0] = ~st[ 1] & st[ 2];
100 bc[ 1] = ~st[ 2] & st[ 3];
101 bc[ 2] = ~st[ 3] & st[ 4];
102 bc[ 3] = ~st[ 4] & st[ 0];
103 bc[ 4] = ~st[ 0] & st[ 1];
104 st[ 0] ^= bc[ 0];
105 st[ 1] ^= bc[ 1];
106 st[ 2] ^= bc[ 2];
107 st[ 3] ^= bc[ 3];
108 st[ 4] ^= bc[ 4];
109
110 bc[ 0] = ~st[ 6] & st[ 7];
111 bc[ 1] = ~st[ 7] & st[ 8];
112 bc[ 2] = ~st[ 8] & st[ 9];
113 bc[ 3] = ~st[ 9] & st[ 5];
114 bc[ 4] = ~st[ 5] & st[ 6];
115 st[ 5] ^= bc[ 0];
116 st[ 6] ^= bc[ 1];
117 st[ 7] ^= bc[ 2];
118 st[ 8] ^= bc[ 3];
119 st[ 9] ^= bc[ 4];
120
121 bc[ 0] = ~st[11] & st[12];
122 bc[ 1] = ~st[12] & st[13];
123 bc[ 2] = ~st[13] & st[14];
124 bc[ 3] = ~st[14] & st[10];
125 bc[ 4] = ~st[10] & st[11];
126 st[10] ^= bc[ 0];
127 st[11] ^= bc[ 1];
128 st[12] ^= bc[ 2];
129 st[13] ^= bc[ 3];
130 st[14] ^= bc[ 4];
131
132 bc[ 0] = ~st[16] & st[17];
133 bc[ 1] = ~st[17] & st[18];
134 bc[ 2] = ~st[18] & st[19];
135 bc[ 3] = ~st[19] & st[15];
136 bc[ 4] = ~st[15] & st[16];
137 st[15] ^= bc[ 0];
138 st[16] ^= bc[ 1];
139 st[17] ^= bc[ 2];
140 st[18] ^= bc[ 3];
141 st[19] ^= bc[ 4];
142
143 bc[ 0] = ~st[21] & st[22];
144 bc[ 1] = ~st[22] & st[23];
145 bc[ 2] = ~st[23] & st[24];
146 bc[ 3] = ~st[24] & st[20];
147 bc[ 4] = ~st[20] & st[21];
148 st[20] ^= bc[ 0];
149 st[21] ^= bc[ 1];
150 st[22] ^= bc[ 2];
151 st[23] ^= bc[ 3];
152 st[24] ^= bc[ 4];
153}
44 154
45 /* Theta */ 155static void __optimize("O3") keccakf(u64 st[25])
46 bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20]; 156{
47 bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21]; 157 int round;
48 bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
49 bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
50 bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
51
52 t[0] = bc[4] ^ rol64(bc[1], 1);
53 t[1] = bc[0] ^ rol64(bc[2], 1);
54 t[2] = bc[1] ^ rol64(bc[3], 1);
55 t[3] = bc[2] ^ rol64(bc[4], 1);
56 t[4] = bc[3] ^ rol64(bc[0], 1);
57
58 st[0] ^= t[0];
59
60 /* Rho Pi */
61 tt = st[1];
62 st[ 1] = rol64(st[ 6] ^ t[1], 44);
63 st[ 6] = rol64(st[ 9] ^ t[4], 20);
64 st[ 9] = rol64(st[22] ^ t[2], 61);
65 st[22] = rol64(st[14] ^ t[4], 39);
66 st[14] = rol64(st[20] ^ t[0], 18);
67 st[20] = rol64(st[ 2] ^ t[2], 62);
68 st[ 2] = rol64(st[12] ^ t[2], 43);
69 st[12] = rol64(st[13] ^ t[3], 25);
70 st[13] = rol64(st[19] ^ t[4], 8);
71 st[19] = rol64(st[23] ^ t[3], 56);
72 st[23] = rol64(st[15] ^ t[0], 41);
73 st[15] = rol64(st[ 4] ^ t[4], 27);
74 st[ 4] = rol64(st[24] ^ t[4], 14);
75 st[24] = rol64(st[21] ^ t[1], 2);
76 st[21] = rol64(st[ 8] ^ t[3], 55);
77 st[ 8] = rol64(st[16] ^ t[1], 45);
78 st[16] = rol64(st[ 5] ^ t[0], 36);
79 st[ 5] = rol64(st[ 3] ^ t[3], 28);
80 st[ 3] = rol64(st[18] ^ t[3], 21);
81 st[18] = rol64(st[17] ^ t[2], 15);
82 st[17] = rol64(st[11] ^ t[1], 10);
83 st[11] = rol64(st[ 7] ^ t[2], 6);
84 st[ 7] = rol64(st[10] ^ t[0], 3);
85 st[10] = rol64( tt ^ t[1], 1);
86
87 /* Chi */
88 bc[ 0] = ~st[ 1] & st[ 2];
89 bc[ 1] = ~st[ 2] & st[ 3];
90 bc[ 2] = ~st[ 3] & st[ 4];
91 bc[ 3] = ~st[ 4] & st[ 0];
92 bc[ 4] = ~st[ 0] & st[ 1];
93 st[ 0] ^= bc[ 0];
94 st[ 1] ^= bc[ 1];
95 st[ 2] ^= bc[ 2];
96 st[ 3] ^= bc[ 3];
97 st[ 4] ^= bc[ 4];
98
99 bc[ 0] = ~st[ 6] & st[ 7];
100 bc[ 1] = ~st[ 7] & st[ 8];
101 bc[ 2] = ~st[ 8] & st[ 9];
102 bc[ 3] = ~st[ 9] & st[ 5];
103 bc[ 4] = ~st[ 5] & st[ 6];
104 st[ 5] ^= bc[ 0];
105 st[ 6] ^= bc[ 1];
106 st[ 7] ^= bc[ 2];
107 st[ 8] ^= bc[ 3];
108 st[ 9] ^= bc[ 4];
109
110 bc[ 0] = ~st[11] & st[12];
111 bc[ 1] = ~st[12] & st[13];
112 bc[ 2] = ~st[13] & st[14];
113 bc[ 3] = ~st[14] & st[10];
114 bc[ 4] = ~st[10] & st[11];
115 st[10] ^= bc[ 0];
116 st[11] ^= bc[ 1];
117 st[12] ^= bc[ 2];
118 st[13] ^= bc[ 3];
119 st[14] ^= bc[ 4];
120
121 bc[ 0] = ~st[16] & st[17];
122 bc[ 1] = ~st[17] & st[18];
123 bc[ 2] = ~st[18] & st[19];
124 bc[ 3] = ~st[19] & st[15];
125 bc[ 4] = ~st[15] & st[16];
126 st[15] ^= bc[ 0];
127 st[16] ^= bc[ 1];
128 st[17] ^= bc[ 2];
129 st[18] ^= bc[ 3];
130 st[19] ^= bc[ 4];
131
132 bc[ 0] = ~st[21] & st[22];
133 bc[ 1] = ~st[22] & st[23];
134 bc[ 2] = ~st[23] & st[24];
135 bc[ 3] = ~st[24] & st[20];
136 bc[ 4] = ~st[20] & st[21];
137 st[20] ^= bc[ 0];
138 st[21] ^= bc[ 1];
139 st[22] ^= bc[ 2];
140 st[23] ^= bc[ 3];
141 st[24] ^= bc[ 4];
142 158
159 for (round = 0; round < KECCAK_ROUNDS; round++) {
160 keccakf_round(st);
143 /* Iota */ 161 /* Iota */
144 st[0] ^= keccakf_rndc[round]; 162 st[0] ^= keccakf_rndc[round];
145 } 163 }
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 676c9788e1c8..0dad0bd9327b 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -660,13 +660,15 @@ struct acpi_device *acpi_companion_match(const struct device *dev)
660 * acpi_of_match_device - Match device object using the "compatible" property. 660 * acpi_of_match_device - Match device object using the "compatible" property.
661 * @adev: ACPI device object to match. 661 * @adev: ACPI device object to match.
662 * @of_match_table: List of device IDs to match against. 662 * @of_match_table: List of device IDs to match against.
663 * @of_id: OF ID if matched
663 * 664 *
664 * If @dev has an ACPI companion which has ACPI_DT_NAMESPACE_HID in its list of 665 * If @dev has an ACPI companion which has ACPI_DT_NAMESPACE_HID in its list of
665 * identifiers and a _DSD object with the "compatible" property, use that 666 * identifiers and a _DSD object with the "compatible" property, use that
666 * property to match against the given list of identifiers. 667 * property to match against the given list of identifiers.
667 */ 668 */
668static bool acpi_of_match_device(struct acpi_device *adev, 669static bool acpi_of_match_device(struct acpi_device *adev,
669 const struct of_device_id *of_match_table) 670 const struct of_device_id *of_match_table,
671 const struct of_device_id **of_id)
670{ 672{
671 const union acpi_object *of_compatible, *obj; 673 const union acpi_object *of_compatible, *obj;
672 int i, nval; 674 int i, nval;
@@ -690,8 +692,11 @@ static bool acpi_of_match_device(struct acpi_device *adev,
690 const struct of_device_id *id; 692 const struct of_device_id *id;
691 693
692 for (id = of_match_table; id->compatible[0]; id++) 694 for (id = of_match_table; id->compatible[0]; id++)
693 if (!strcasecmp(obj->string.pointer, id->compatible)) 695 if (!strcasecmp(obj->string.pointer, id->compatible)) {
696 if (of_id)
697 *of_id = id;
694 return true; 698 return true;
699 }
695 } 700 }
696 701
697 return false; 702 return false;
@@ -762,10 +767,11 @@ static bool __acpi_match_device_cls(const struct acpi_device_id *id,
762 return true; 767 return true;
763} 768}
764 769
765static const struct acpi_device_id *__acpi_match_device( 770static bool __acpi_match_device(struct acpi_device *device,
766 struct acpi_device *device, 771 const struct acpi_device_id *acpi_ids,
767 const struct acpi_device_id *ids, 772 const struct of_device_id *of_ids,
768 const struct of_device_id *of_ids) 773 const struct acpi_device_id **acpi_id,
774 const struct of_device_id **of_id)
769{ 775{
770 const struct acpi_device_id *id; 776 const struct acpi_device_id *id;
771 struct acpi_hardware_id *hwid; 777 struct acpi_hardware_id *hwid;
@@ -775,30 +781,32 @@ static const struct acpi_device_id *__acpi_match_device(
775 * driver for it. 781 * driver for it.
776 */ 782 */
777 if (!device || !device->status.present) 783 if (!device || !device->status.present)
778 return NULL; 784 return false;
779 785
780 list_for_each_entry(hwid, &device->pnp.ids, list) { 786 list_for_each_entry(hwid, &device->pnp.ids, list) {
781 /* First, check the ACPI/PNP IDs provided by the caller. */ 787 /* First, check the ACPI/PNP IDs provided by the caller. */
782 for (id = ids; id->id[0] || id->cls; id++) { 788 if (acpi_ids) {
783 if (id->id[0] && !strcmp((char *) id->id, hwid->id)) 789 for (id = acpi_ids; id->id[0] || id->cls; id++) {
784 return id; 790 if (id->id[0] && !strcmp((char *)id->id, hwid->id))
785 else if (id->cls && __acpi_match_device_cls(id, hwid)) 791 goto out_acpi_match;
786 return id; 792 if (id->cls && __acpi_match_device_cls(id, hwid))
793 goto out_acpi_match;
794 }
787 } 795 }
788 796
789 /* 797 /*
790 * Next, check ACPI_DT_NAMESPACE_HID and try to match the 798 * Next, check ACPI_DT_NAMESPACE_HID and try to match the
791 * "compatible" property if found. 799 * "compatible" property if found.
792 *
793 * The id returned by the below is not valid, but the only
794 * caller passing non-NULL of_ids here is only interested in
795 * whether or not the return value is NULL.
796 */ 800 */
797 if (!strcmp(ACPI_DT_NAMESPACE_HID, hwid->id) 801 if (!strcmp(ACPI_DT_NAMESPACE_HID, hwid->id))
798 && acpi_of_match_device(device, of_ids)) 802 return acpi_of_match_device(device, of_ids, of_id);
799 return id;
800 } 803 }
801 return NULL; 804 return false;
805
806out_acpi_match:
807 if (acpi_id)
808 *acpi_id = id;
809 return true;
802} 810}
803 811
804/** 812/**
@@ -815,32 +823,29 @@ static const struct acpi_device_id *__acpi_match_device(
815const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids, 823const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids,
816 const struct device *dev) 824 const struct device *dev)
817{ 825{
818 return __acpi_match_device(acpi_companion_match(dev), ids, NULL); 826 const struct acpi_device_id *id = NULL;
827
828 __acpi_match_device(acpi_companion_match(dev), ids, NULL, &id, NULL);
829 return id;
819} 830}
820EXPORT_SYMBOL_GPL(acpi_match_device); 831EXPORT_SYMBOL_GPL(acpi_match_device);
821 832
822void *acpi_get_match_data(const struct device *dev) 833const void *acpi_device_get_match_data(const struct device *dev)
823{ 834{
824 const struct acpi_device_id *match; 835 const struct acpi_device_id *match;
825 836
826 if (!dev->driver)
827 return NULL;
828
829 if (!dev->driver->acpi_match_table)
830 return NULL;
831
832 match = acpi_match_device(dev->driver->acpi_match_table, dev); 837 match = acpi_match_device(dev->driver->acpi_match_table, dev);
833 if (!match) 838 if (!match)
834 return NULL; 839 return NULL;
835 840
836 return (void *)match->driver_data; 841 return (const void *)match->driver_data;
837} 842}
838EXPORT_SYMBOL_GPL(acpi_get_match_data); 843EXPORT_SYMBOL_GPL(acpi_device_get_match_data);
839 844
840int acpi_match_device_ids(struct acpi_device *device, 845int acpi_match_device_ids(struct acpi_device *device,
841 const struct acpi_device_id *ids) 846 const struct acpi_device_id *ids)
842{ 847{
843 return __acpi_match_device(device, ids, NULL) ? 0 : -ENOENT; 848 return __acpi_match_device(device, ids, NULL, NULL, NULL) ? 0 : -ENOENT;
844} 849}
845EXPORT_SYMBOL(acpi_match_device_ids); 850EXPORT_SYMBOL(acpi_match_device_ids);
846 851
@@ -849,10 +854,12 @@ bool acpi_driver_match_device(struct device *dev,
849{ 854{
850 if (!drv->acpi_match_table) 855 if (!drv->acpi_match_table)
851 return acpi_of_match_device(ACPI_COMPANION(dev), 856 return acpi_of_match_device(ACPI_COMPANION(dev),
852 drv->of_match_table); 857 drv->of_match_table,
858 NULL);
853 859
854 return !!__acpi_match_device(acpi_companion_match(dev), 860 return __acpi_match_device(acpi_companion_match(dev),
855 drv->acpi_match_table, drv->of_match_table); 861 drv->acpi_match_table, drv->of_match_table,
862 NULL, NULL);
856} 863}
857EXPORT_SYMBOL_GPL(acpi_driver_match_device); 864EXPORT_SYMBOL_GPL(acpi_driver_match_device);
858 865
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index d9f38c645e4a..30a572956557 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -1927,6 +1927,9 @@ static int acpi_ec_suspend_noirq(struct device *dev)
1927 ec->reference_count >= 1) 1927 ec->reference_count >= 1)
1928 acpi_set_gpe(NULL, ec->gpe, ACPI_GPE_DISABLE); 1928 acpi_set_gpe(NULL, ec->gpe, ACPI_GPE_DISABLE);
1929 1929
1930 if (acpi_sleep_no_ec_events())
1931 acpi_ec_enter_noirq(ec);
1932
1930 return 0; 1933 return 0;
1931} 1934}
1932 1935
@@ -1934,6 +1937,9 @@ static int acpi_ec_resume_noirq(struct device *dev)
1934{ 1937{
1935 struct acpi_ec *ec = acpi_driver_data(to_acpi_device(dev)); 1938 struct acpi_ec *ec = acpi_driver_data(to_acpi_device(dev));
1936 1939
1940 if (acpi_sleep_no_ec_events())
1941 acpi_ec_leave_noirq(ec);
1942
1937 if (ec_no_wakeup && test_bit(EC_FLAGS_STARTED, &ec->flags) && 1943 if (ec_no_wakeup && test_bit(EC_FLAGS_STARTED, &ec->flags) &&
1938 ec->reference_count >= 1) 1944 ec->reference_count >= 1)
1939 acpi_set_gpe(NULL, ec->gpe, ACPI_GPE_ENABLE); 1945 acpi_set_gpe(NULL, ec->gpe, ACPI_GPE_ENABLE);
diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 466d1503aba0..5815356ea6ad 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -1271,11 +1271,11 @@ static int acpi_fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode,
1271 return 0; 1271 return 0;
1272} 1272}
1273 1273
1274static void * 1274static const void *
1275acpi_fwnode_device_get_match_data(const struct fwnode_handle *fwnode, 1275acpi_fwnode_device_get_match_data(const struct fwnode_handle *fwnode,
1276 const struct device *dev) 1276 const struct device *dev)
1277{ 1277{
1278 return acpi_get_match_data(dev); 1278 return acpi_device_get_match_data(dev);
1279} 1279}
1280 1280
1281#define DECLARE_ACPI_FWNODE_OPS(ops) \ 1281#define DECLARE_ACPI_FWNODE_OPS(ops) \
diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c
index 89e97d21a89c..9d52743080a4 100644
--- a/drivers/acpi/spcr.c
+++ b/drivers/acpi/spcr.c
@@ -115,6 +115,7 @@ int __init acpi_parse_spcr(bool enable_earlycon, bool enable_console)
115 table->serial_port.access_width))) { 115 table->serial_port.access_width))) {
116 default: 116 default:
117 pr_err("Unexpected SPCR Access Width. Defaulting to byte size\n"); 117 pr_err("Unexpected SPCR Access Width. Defaulting to byte size\n");
118 /* fall through */
118 case 8: 119 case 8:
119 iotype = "mmio"; 120 iotype = "mmio";
120 break; 121 break;
diff --git a/drivers/base/core.c b/drivers/base/core.c
index b2261f92f2f1..5847364f25d9 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -310,6 +310,9 @@ static void __device_link_del(struct device_link *link)
310 dev_info(link->consumer, "Dropping the link to %s\n", 310 dev_info(link->consumer, "Dropping the link to %s\n",
311 dev_name(link->supplier)); 311 dev_name(link->supplier));
312 312
313 if (link->flags & DL_FLAG_PM_RUNTIME)
314 pm_runtime_drop_link(link->consumer);
315
313 list_del(&link->s_node); 316 list_del(&link->s_node);
314 list_del(&link->c_node); 317 list_del(&link->c_node);
315 device_link_free(link); 318 device_link_free(link);
diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index a8ac86e4d79e..6637fc319269 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -321,7 +321,8 @@ void dev_pm_arm_wake_irq(struct wake_irq *wirq)
321 return; 321 return;
322 322
323 if (device_may_wakeup(wirq->dev)) { 323 if (device_may_wakeup(wirq->dev)) {
324 if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED) 324 if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED &&
325 !pm_runtime_status_suspended(wirq->dev))
325 enable_irq(wirq->irq); 326 enable_irq(wirq->irq);
326 327
327 enable_irq_wake(wirq->irq); 328 enable_irq_wake(wirq->irq);
@@ -343,7 +344,8 @@ void dev_pm_disarm_wake_irq(struct wake_irq *wirq)
343 if (device_may_wakeup(wirq->dev)) { 344 if (device_may_wakeup(wirq->dev)) {
344 disable_irq_wake(wirq->irq); 345 disable_irq_wake(wirq->irq);
345 346
346 if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED) 347 if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED &&
348 !pm_runtime_status_suspended(wirq->dev))
347 disable_irq_nosync(wirq->irq); 349 disable_irq_nosync(wirq->irq);
348 } 350 }
349} 351}
diff --git a/drivers/base/property.c b/drivers/base/property.c
index 302236281d83..8f205f6461ed 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -1410,9 +1410,8 @@ int fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode,
1410} 1410}
1411EXPORT_SYMBOL(fwnode_graph_parse_endpoint); 1411EXPORT_SYMBOL(fwnode_graph_parse_endpoint);
1412 1412
1413void *device_get_match_data(struct device *dev) 1413const void *device_get_match_data(struct device *dev)
1414{ 1414{
1415 return fwnode_call_ptr_op(dev_fwnode(dev), device_get_match_data, 1415 return fwnode_call_ptr_op(dev_fwnode(dev), device_get_match_data, dev);
1416 dev);
1417} 1416}
1418EXPORT_SYMBOL_GPL(device_get_match_data); 1417EXPORT_SYMBOL_GPL(device_get_match_data);
diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index 204afe66de92..3d7a5c149af3 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -203,6 +203,12 @@ static const struct usb_device_id ath3k_blist_tbl[] = {
203 { } /* Terminating entry */ 203 { } /* Terminating entry */
204}; 204};
205 205
206static inline void ath3k_log_failed_loading(int err, int len, int size)
207{
208 BT_ERR("Error in firmware loading err = %d, len = %d, size = %d",
209 err, len, size);
210}
211
206#define USB_REQ_DFU_DNLOAD 1 212#define USB_REQ_DFU_DNLOAD 1
207#define BULK_SIZE 4096 213#define BULK_SIZE 4096
208#define FW_HDR_SIZE 20 214#define FW_HDR_SIZE 20
@@ -227,15 +233,16 @@ static int ath3k_load_firmware(struct usb_device *udev,
227 return -ENOMEM; 233 return -ENOMEM;
228 } 234 }
229 235
230 memcpy(send_buf, firmware->data, 20); 236 memcpy(send_buf, firmware->data, FW_HDR_SIZE);
231 err = usb_control_msg(udev, pipe, USB_REQ_DFU_DNLOAD, USB_TYPE_VENDOR, 237 err = usb_control_msg(udev, pipe, USB_REQ_DFU_DNLOAD, USB_TYPE_VENDOR,
232 0, 0, send_buf, 20, USB_CTRL_SET_TIMEOUT); 238 0, 0, send_buf, FW_HDR_SIZE,
239 USB_CTRL_SET_TIMEOUT);
233 if (err < 0) { 240 if (err < 0) {
234 BT_ERR("Can't change to loading configuration err"); 241 BT_ERR("Can't change to loading configuration err");
235 goto error; 242 goto error;
236 } 243 }
237 sent += 20; 244 sent += FW_HDR_SIZE;
238 count -= 20; 245 count -= FW_HDR_SIZE;
239 246
240 pipe = usb_sndbulkpipe(udev, 0x02); 247 pipe = usb_sndbulkpipe(udev, 0x02);
241 248
@@ -250,8 +257,7 @@ static int ath3k_load_firmware(struct usb_device *udev,
250 &len, 3000); 257 &len, 3000);
251 258
252 if (err || (len != size)) { 259 if (err || (len != size)) {
253 BT_ERR("Error in firmware loading err = %d," 260 ath3k_log_failed_loading(err, len, size);
254 "len = %d, size = %d", err, len, size);
255 goto error; 261 goto error;
256 } 262 }
257 263
@@ -350,8 +356,7 @@ static int ath3k_load_fwfile(struct usb_device *udev,
350 err = usb_bulk_msg(udev, pipe, send_buf, size, 356 err = usb_bulk_msg(udev, pipe, send_buf, size,
351 &len, 3000); 357 &len, 3000);
352 if (err || (len != size)) { 358 if (err || (len != size)) {
353 BT_ERR("Error in firmware loading err = %d," 359 ath3k_log_failed_loading(err, len, size);
354 "len = %d, size = %d", err, len, size);
355 kfree(send_buf); 360 kfree(send_buf);
356 return err; 361 return err;
357 } 362 }
@@ -398,7 +403,7 @@ static int ath3k_set_normal_mode(struct usb_device *udev)
398static int ath3k_load_patch(struct usb_device *udev) 403static int ath3k_load_patch(struct usb_device *udev)
399{ 404{
400 unsigned char fw_state; 405 unsigned char fw_state;
401 char filename[ATH3K_NAME_LEN] = {0}; 406 char filename[ATH3K_NAME_LEN];
402 const struct firmware *firmware; 407 const struct firmware *firmware;
403 struct ath3k_version fw_version; 408 struct ath3k_version fw_version;
404 __u32 pt_rom_version, pt_build_version; 409 __u32 pt_rom_version, pt_build_version;
@@ -451,7 +456,7 @@ static int ath3k_load_patch(struct usb_device *udev)
451static int ath3k_load_syscfg(struct usb_device *udev) 456static int ath3k_load_syscfg(struct usb_device *udev)
452{ 457{
453 unsigned char fw_state; 458 unsigned char fw_state;
454 char filename[ATH3K_NAME_LEN] = {0}; 459 char filename[ATH3K_NAME_LEN];
455 const struct firmware *firmware; 460 const struct firmware *firmware;
456 struct ath3k_version fw_version; 461 struct ath3k_version fw_version;
457 int clk_value, ret; 462 int clk_value, ret;
@@ -522,7 +527,6 @@ static int ath3k_probe(struct usb_interface *intf,
522 527
523 /* load patch and sysconfig files for AR3012 */ 528 /* load patch and sysconfig files for AR3012 */
524 if (id->driver_info & BTUSB_ATH3012) { 529 if (id->driver_info & BTUSB_ATH3012) {
525
526 /* New firmware with patch and sysconfig files already loaded */ 530 /* New firmware with patch and sysconfig files already loaded */
527 if (le16_to_cpu(udev->descriptor.bcdDevice) > 0x0001) 531 if (le16_to_cpu(udev->descriptor.bcdDevice) > 0x0001)
528 return -ENODEV; 532 return -ENODEV;
@@ -565,7 +569,7 @@ static int ath3k_probe(struct usb_interface *intf,
565 569
566static void ath3k_disconnect(struct usb_interface *intf) 570static void ath3k_disconnect(struct usb_interface *intf)
567{ 571{
568 BT_DBG("ath3k_disconnect intf %p", intf); 572 BT_DBG("%s intf %p", __func__, intf);
569} 573}
570 574
571static struct usb_driver ath3k_driver = { 575static struct usb_driver ath3k_driver = {
diff --git a/drivers/bluetooth/btmrvl_main.c b/drivers/bluetooth/btmrvl_main.c
index b280d466f05b..f6c694a1b9b0 100644
--- a/drivers/bluetooth/btmrvl_main.c
+++ b/drivers/bluetooth/btmrvl_main.c
@@ -183,7 +183,7 @@ static int btmrvl_send_sync_cmd(struct btmrvl_private *priv, u16 opcode,
183 return -EFAULT; 183 return -EFAULT;
184 } 184 }
185 185
186 skb = bt_skb_alloc(HCI_COMMAND_HDR_SIZE + len, GFP_ATOMIC); 186 skb = bt_skb_alloc(HCI_COMMAND_HDR_SIZE + len, GFP_KERNEL);
187 if (!skb) { 187 if (!skb) {
188 BT_ERR("No free skb"); 188 BT_ERR("No free skb");
189 return -ENOMEM; 189 return -ENOMEM;
diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c
index 6e2ad748abba..437f080deaab 100644
--- a/drivers/bluetooth/btrtl.c
+++ b/drivers/bluetooth/btrtl.c
@@ -35,6 +35,60 @@
35#define RTL_ROM_LMP_8761A 0x8761 35#define RTL_ROM_LMP_8761A 0x8761
36#define RTL_ROM_LMP_8822B 0x8822 36#define RTL_ROM_LMP_8822B 0x8822
37 37
38#define IC_MATCH_FL_LMPSUBV (1 << 0)
39#define IC_MATCH_FL_HCIREV (1 << 1)
40#define IC_INFO(lmps, hcir) \
41 .match_flags = IC_MATCH_FL_LMPSUBV | IC_MATCH_FL_HCIREV, \
42 .lmp_subver = (lmps), \
43 .hci_rev = (hcir)
44
45struct id_table {
46 __u16 match_flags;
47 __u16 lmp_subver;
48 __u16 hci_rev;
49 bool config_needed;
50 char *fw_name;
51 char *cfg_name;
52};
53
54static const struct id_table ic_id_table[] = {
55 /* 8723B */
56 { IC_INFO(RTL_ROM_LMP_8723B, 0xb),
57 .config_needed = false,
58 .fw_name = "rtl_bt/rtl8723b_fw.bin",
59 .cfg_name = "rtl_bt/rtl8723b_config.bin" },
60
61 /* 8723D */
62 { IC_INFO(RTL_ROM_LMP_8723B, 0xd),
63 .config_needed = true,
64 .fw_name = "rtl_bt/rtl8723d_fw.bin",
65 .cfg_name = "rtl_bt/rtl8723d_config.bin" },
66
67 /* 8821A */
68 { IC_INFO(RTL_ROM_LMP_8821A, 0xa),
69 .config_needed = false,
70 .fw_name = "rtl_bt/rtl8821a_fw.bin",
71 .cfg_name = "rtl_bt/rtl8821a_config.bin" },
72
73 /* 8821C */
74 { IC_INFO(RTL_ROM_LMP_8821A, 0xc),
75 .config_needed = false,
76 .fw_name = "rtl_bt/rtl8821c_fw.bin",
77 .cfg_name = "rtl_bt/rtl8821c_config.bin" },
78
79 /* 8761A */
80 { IC_MATCH_FL_LMPSUBV, RTL_ROM_LMP_8761A, 0x0,
81 .config_needed = false,
82 .fw_name = "rtl_bt/rtl8761a_fw.bin",
83 .cfg_name = "rtl_bt/rtl8761a_config.bin" },
84
85 /* 8822B */
86 { IC_INFO(RTL_ROM_LMP_8822B, 0xb),
87 .config_needed = true,
88 .fw_name = "rtl_bt/rtl8822b_fw.bin",
89 .cfg_name = "rtl_bt/rtl8822b_config.bin" },
90 };
91
38static int rtl_read_rom_version(struct hci_dev *hdev, u8 *version) 92static int rtl_read_rom_version(struct hci_dev *hdev, u8 *version)
39{ 93{
40 struct rtl_rom_version_evt *rom_version; 94 struct rtl_rom_version_evt *rom_version;
@@ -64,9 +118,9 @@ static int rtl_read_rom_version(struct hci_dev *hdev, u8 *version)
64 return 0; 118 return 0;
65} 119}
66 120
67static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver, 121static int rtlbt_parse_firmware(struct hci_dev *hdev, u16 lmp_subver,
68 const struct firmware *fw, 122 const struct firmware *fw,
69 unsigned char **_buf) 123 unsigned char **_buf)
70{ 124{
71 const u8 extension_sig[] = { 0x51, 0x04, 0xfd, 0x77 }; 125 const u8 extension_sig[] = { 0x51, 0x04, 0xfd, 0x77 };
72 struct rtl_epatch_header *epatch_info; 126 struct rtl_epatch_header *epatch_info;
@@ -88,6 +142,8 @@ static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver,
88 { RTL_ROM_LMP_8821A, 2 }, 142 { RTL_ROM_LMP_8821A, 2 },
89 { RTL_ROM_LMP_8761A, 3 }, 143 { RTL_ROM_LMP_8761A, 3 },
90 { RTL_ROM_LMP_8822B, 8 }, 144 { RTL_ROM_LMP_8822B, 8 },
145 { RTL_ROM_LMP_8723B, 9 }, /* 8723D */
146 { RTL_ROM_LMP_8821A, 10 }, /* 8821C */
91 }; 147 };
92 148
93 ret = rtl_read_rom_version(hdev, &rom_version); 149 ret = rtl_read_rom_version(hdev, &rom_version);
@@ -320,8 +376,8 @@ out:
320 return ret; 376 return ret;
321} 377}
322 378
323static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver, 379static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 hci_rev,
324 const char *fw_name) 380 u16 lmp_subver)
325{ 381{
326 unsigned char *fw_data = NULL; 382 unsigned char *fw_data = NULL;
327 const struct firmware *fw; 383 const struct firmware *fw;
@@ -330,39 +386,40 @@ static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver,
330 u8 *cfg_buff = NULL; 386 u8 *cfg_buff = NULL;
331 u8 *tbuff; 387 u8 *tbuff;
332 char *cfg_name = NULL; 388 char *cfg_name = NULL;
333 bool config_needed = false; 389 char *fw_name = NULL;
390 int i;
391
392 for (i = 0; i < ARRAY_SIZE(ic_id_table); i++) {
393 if ((ic_id_table[i].match_flags & IC_MATCH_FL_LMPSUBV) &&
394 (ic_id_table[i].lmp_subver != lmp_subver))
395 continue;
396 if ((ic_id_table[i].match_flags & IC_MATCH_FL_HCIREV) &&
397 (ic_id_table[i].hci_rev != hci_rev))
398 continue;
334 399
335 switch (lmp_subver) {
336 case RTL_ROM_LMP_8723B:
337 cfg_name = "rtl_bt/rtl8723b_config.bin";
338 break;
339 case RTL_ROM_LMP_8821A:
340 cfg_name = "rtl_bt/rtl8821a_config.bin";
341 break;
342 case RTL_ROM_LMP_8761A:
343 cfg_name = "rtl_bt/rtl8761a_config.bin";
344 break;
345 case RTL_ROM_LMP_8822B:
346 cfg_name = "rtl_bt/rtl8822b_config.bin";
347 config_needed = true;
348 break;
349 default:
350 BT_ERR("%s: rtl: no config according to lmp_subver %04x",
351 hdev->name, lmp_subver);
352 break; 400 break;
353 } 401 }
354 402
403 if (i >= ARRAY_SIZE(ic_id_table)) {
404 BT_ERR("%s: unknown IC info, lmp subver %04x, hci rev %04x",
405 hdev->name, lmp_subver, hci_rev);
406 return -EINVAL;
407 }
408
409 cfg_name = ic_id_table[i].cfg_name;
410
355 if (cfg_name) { 411 if (cfg_name) {
356 cfg_sz = rtl_load_config(hdev, cfg_name, &cfg_buff); 412 cfg_sz = rtl_load_config(hdev, cfg_name, &cfg_buff);
357 if (cfg_sz < 0) { 413 if (cfg_sz < 0) {
358 cfg_sz = 0; 414 cfg_sz = 0;
359 if (config_needed) 415 if (ic_id_table[i].config_needed)
360 BT_ERR("Necessary config file %s not found\n", 416 BT_ERR("Necessary config file %s not found\n",
361 cfg_name); 417 cfg_name);
362 } 418 }
363 } else 419 } else
364 cfg_sz = 0; 420 cfg_sz = 0;
365 421
422 fw_name = ic_id_table[i].fw_name;
366 bt_dev_info(hdev, "rtl: loading %s", fw_name); 423 bt_dev_info(hdev, "rtl: loading %s", fw_name);
367 ret = request_firmware(&fw, fw_name, &hdev->dev); 424 ret = request_firmware(&fw, fw_name, &hdev->dev);
368 if (ret < 0) { 425 if (ret < 0) {
@@ -370,7 +427,7 @@ static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver,
370 goto err_req_fw; 427 goto err_req_fw;
371 } 428 }
372 429
373 ret = rtl8723b_parse_firmware(hdev, lmp_subver, fw, &fw_data); 430 ret = rtlbt_parse_firmware(hdev, lmp_subver, fw, &fw_data);
374 if (ret < 0) 431 if (ret < 0)
375 goto out; 432 goto out;
376 433
@@ -429,7 +486,7 @@ int btrtl_setup_realtek(struct hci_dev *hdev)
429{ 486{
430 struct sk_buff *skb; 487 struct sk_buff *skb;
431 struct hci_rp_read_local_version *resp; 488 struct hci_rp_read_local_version *resp;
432 u16 lmp_subver; 489 u16 hci_rev, lmp_subver;
433 490
434 skb = btrtl_read_local_version(hdev); 491 skb = btrtl_read_local_version(hdev);
435 if (IS_ERR(skb)) 492 if (IS_ERR(skb))
@@ -441,6 +498,7 @@ int btrtl_setup_realtek(struct hci_dev *hdev)
441 resp->hci_ver, resp->hci_rev, 498 resp->hci_ver, resp->hci_rev,
442 resp->lmp_ver, resp->lmp_subver); 499 resp->lmp_ver, resp->lmp_subver);
443 500
501 hci_rev = le16_to_cpu(resp->hci_rev);
444 lmp_subver = le16_to_cpu(resp->lmp_subver); 502 lmp_subver = le16_to_cpu(resp->lmp_subver);
445 kfree_skb(skb); 503 kfree_skb(skb);
446 504
@@ -455,17 +513,10 @@ int btrtl_setup_realtek(struct hci_dev *hdev)
455 case RTL_ROM_LMP_3499: 513 case RTL_ROM_LMP_3499:
456 return btrtl_setup_rtl8723a(hdev); 514 return btrtl_setup_rtl8723a(hdev);
457 case RTL_ROM_LMP_8723B: 515 case RTL_ROM_LMP_8723B:
458 return btrtl_setup_rtl8723b(hdev, lmp_subver,
459 "rtl_bt/rtl8723b_fw.bin");
460 case RTL_ROM_LMP_8821A: 516 case RTL_ROM_LMP_8821A:
461 return btrtl_setup_rtl8723b(hdev, lmp_subver,
462 "rtl_bt/rtl8821a_fw.bin");
463 case RTL_ROM_LMP_8761A: 517 case RTL_ROM_LMP_8761A:
464 return btrtl_setup_rtl8723b(hdev, lmp_subver,
465 "rtl_bt/rtl8761a_fw.bin");
466 case RTL_ROM_LMP_8822B: 518 case RTL_ROM_LMP_8822B:
467 return btrtl_setup_rtl8723b(hdev, lmp_subver, 519 return btrtl_setup_rtl8723b(hdev, hci_rev, lmp_subver);
468 "rtl_bt/rtl8822b_fw.bin");
469 default: 520 default:
470 bt_dev_info(hdev, "rtl: assuming no firmware upload needed"); 521 bt_dev_info(hdev, "rtl: assuming no firmware upload needed");
471 return 0; 522 return 0;
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 2a55380ad730..c8e9ae6b99e1 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -339,6 +339,7 @@ static const struct usb_device_id blacklist_table[] = {
339 339
340 /* Intel Bluetooth devices */ 340 /* Intel Bluetooth devices */
341 { USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_NEW }, 341 { USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_NEW },
342 { USB_DEVICE(0x8087, 0x0026), .driver_info = BTUSB_INTEL_NEW },
342 { USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR }, 343 { USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR },
343 { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL }, 344 { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL },
344 { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL }, 345 { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL },
@@ -373,6 +374,9 @@ static const struct usb_device_id blacklist_table[] = {
373 { USB_DEVICE(0x13d3, 0x3461), .driver_info = BTUSB_REALTEK }, 374 { USB_DEVICE(0x13d3, 0x3461), .driver_info = BTUSB_REALTEK },
374 { USB_DEVICE(0x13d3, 0x3462), .driver_info = BTUSB_REALTEK }, 375 { USB_DEVICE(0x13d3, 0x3462), .driver_info = BTUSB_REALTEK },
375 376
377 /* Additional Realtek 8822BE Bluetooth devices */
378 { USB_DEVICE(0x0b05, 0x185c), .driver_info = BTUSB_REALTEK },
379
376 /* Silicon Wave based devices */ 380 /* Silicon Wave based devices */
377 { USB_DEVICE(0x0c10, 0x0000), .driver_info = BTUSB_SWAVE }, 381 { USB_DEVICE(0x0c10, 0x0000), .driver_info = BTUSB_SWAVE },
378 382
@@ -2057,6 +2061,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
2057 case 0x0c: /* WsP */ 2061 case 0x0c: /* WsP */
2058 case 0x11: /* JfP */ 2062 case 0x11: /* JfP */
2059 case 0x12: /* ThP */ 2063 case 0x12: /* ThP */
2064 case 0x13: /* HrP */
2065 case 0x14: /* QnJ, IcP */
2060 break; 2066 break;
2061 default: 2067 default:
2062 BT_ERR("%s: Unsupported Intel hardware variant (%u)", 2068 BT_ERR("%s: Unsupported Intel hardware variant (%u)",
@@ -2149,6 +2155,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
2149 break; 2155 break;
2150 case 0x11: /* JfP */ 2156 case 0x11: /* JfP */
2151 case 0x12: /* ThP */ 2157 case 0x12: /* ThP */
2158 case 0x13: /* HrP */
2159 case 0x14: /* QnJ, IcP */
2152 snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.sfi", 2160 snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.sfi",
2153 le16_to_cpu(ver.hw_variant), 2161 le16_to_cpu(ver.hw_variant),
2154 le16_to_cpu(ver.hw_revision), 2162 le16_to_cpu(ver.hw_revision),
@@ -2180,6 +2188,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
2180 break; 2188 break;
2181 case 0x11: /* JfP */ 2189 case 0x11: /* JfP */
2182 case 0x12: /* ThP */ 2190 case 0x12: /* ThP */
2191 case 0x13: /* HrP */
2192 case 0x14: /* QnJ, IcP */
2183 snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.ddc", 2193 snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.ddc",
2184 le16_to_cpu(ver.hw_variant), 2194 le16_to_cpu(ver.hw_variant),
2185 le16_to_cpu(ver.hw_revision), 2195 le16_to_cpu(ver.hw_revision),
diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c
index 14ae7ee88acb..d568fbd94d6c 100644
--- a/drivers/bluetooth/hci_ath.c
+++ b/drivers/bluetooth/hci_ath.c
@@ -71,12 +71,12 @@ static int ath_wakeup_ar3k(struct tty_struct *tty)
71 /* Clear RTS first */ 71 /* Clear RTS first */
72 tty->driver->ops->tiocmget(tty); 72 tty->driver->ops->tiocmget(tty);
73 tty->driver->ops->tiocmset(tty, 0x00, TIOCM_RTS); 73 tty->driver->ops->tiocmset(tty, 0x00, TIOCM_RTS);
74 mdelay(20); 74 msleep(20);
75 75
76 /* Set RTS, wake up board */ 76 /* Set RTS, wake up board */
77 tty->driver->ops->tiocmget(tty); 77 tty->driver->ops->tiocmget(tty);
78 tty->driver->ops->tiocmset(tty, TIOCM_RTS, 0x00); 78 tty->driver->ops->tiocmset(tty, TIOCM_RTS, 0x00);
79 mdelay(20); 79 msleep(20);
80 80
81 status = tty->driver->ops->tiocmget(tty); 81 status = tty->driver->ops->tiocmget(tty);
82 return status; 82 return status;
diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c
index 1b4417a623a4..2f30dcad96bd 100644
--- a/drivers/bluetooth/hci_ll.c
+++ b/drivers/bluetooth/hci_ll.c
@@ -650,7 +650,7 @@ static int download_firmware(struct ll_device *lldev)
650 break; 650 break;
651 case ACTION_DELAY: /* sleep */ 651 case ACTION_DELAY: /* sleep */
652 bt_dev_info(lldev->hu.hdev, "sleep command in scr"); 652 bt_dev_info(lldev->hu.hdev, "sleep command in scr");
653 mdelay(((struct bts_action_delay *)action_ptr)->msec); 653 msleep(((struct bts_action_delay *)action_ptr)->msec);
654 break; 654 break;
655 } 655 }
656 len -= (sizeof(struct bts_action) + 656 len -= (sizeof(struct bts_action) +
diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c
index d1f5bb534e0e..6e9df558325b 100644
--- a/drivers/char/hw_random/via-rng.c
+++ b/drivers/char/hw_random/via-rng.c
@@ -162,7 +162,7 @@ static int via_rng_init(struct hwrng *rng)
162 /* Enable secondary noise source on CPUs where it is present. */ 162 /* Enable secondary noise source on CPUs where it is present. */
163 163
164 /* Nehemiah stepping 8 and higher */ 164 /* Nehemiah stepping 8 and higher */
165 if ((c->x86_model == 9) && (c->x86_mask > 7)) 165 if ((c->x86_model == 9) && (c->x86_stepping > 7))
166 lo |= VIA_NOISESRC2; 166 lo |= VIA_NOISESRC2;
167 167
168 /* Esther */ 168 /* Esther */
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 3a2ca0f79daf..d0c34df0529c 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -629,7 +629,7 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
629 if (c->x86_vendor == X86_VENDOR_INTEL) { 629 if (c->x86_vendor == X86_VENDOR_INTEL) {
630 if ((c->x86 == 15) && 630 if ((c->x86 == 15) &&
631 (c->x86_model == 6) && 631 (c->x86_model == 6) &&
632 (c->x86_mask == 8)) { 632 (c->x86_stepping == 8)) {
633 pr_info("Intel(R) Xeon(R) 7100 Errata AL30, processors may lock up on frequency changes: disabling acpi-cpufreq\n"); 633 pr_info("Intel(R) Xeon(R) 7100 Errata AL30, processors may lock up on frequency changes: disabling acpi-cpufreq\n");
634 return -ENODEV; 634 return -ENODEV;
635 } 635 }
diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c
index 942632a27b50..f730b6528c18 100644
--- a/drivers/cpufreq/longhaul.c
+++ b/drivers/cpufreq/longhaul.c
@@ -775,7 +775,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy)
775 break; 775 break;
776 776
777 case 7: 777 case 7:
778 switch (c->x86_mask) { 778 switch (c->x86_stepping) {
779 case 0: 779 case 0:
780 longhaul_version = TYPE_LONGHAUL_V1; 780 longhaul_version = TYPE_LONGHAUL_V1;
781 cpu_model = CPU_SAMUEL2; 781 cpu_model = CPU_SAMUEL2;
@@ -787,7 +787,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy)
787 break; 787 break;
788 case 1 ... 15: 788 case 1 ... 15:
789 longhaul_version = TYPE_LONGHAUL_V2; 789 longhaul_version = TYPE_LONGHAUL_V2;
790 if (c->x86_mask < 8) { 790 if (c->x86_stepping < 8) {
791 cpu_model = CPU_SAMUEL2; 791 cpu_model = CPU_SAMUEL2;
792 cpuname = "C3 'Samuel 2' [C5B]"; 792 cpuname = "C3 'Samuel 2' [C5B]";
793 } else { 793 } else {
@@ -814,7 +814,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy)
814 numscales = 32; 814 numscales = 32;
815 memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults)); 815 memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults));
816 memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr)); 816 memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr));
817 switch (c->x86_mask) { 817 switch (c->x86_stepping) {
818 case 0 ... 1: 818 case 0 ... 1:
819 cpu_model = CPU_NEHEMIAH; 819 cpu_model = CPU_NEHEMIAH;
820 cpuname = "C3 'Nehemiah A' [C5XLOE]"; 820 cpuname = "C3 'Nehemiah A' [C5XLOE]";
diff --git a/drivers/cpufreq/p4-clockmod.c b/drivers/cpufreq/p4-clockmod.c
index fd77812313f3..a25741b1281b 100644
--- a/drivers/cpufreq/p4-clockmod.c
+++ b/drivers/cpufreq/p4-clockmod.c
@@ -168,7 +168,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
168#endif 168#endif
169 169
170 /* Errata workaround */ 170 /* Errata workaround */
171 cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask; 171 cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_stepping;
172 switch (cpuid) { 172 switch (cpuid) {
173 case 0x0f07: 173 case 0x0f07:
174 case 0x0f0a: 174 case 0x0f0a:
diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c
index 80ac313e6c59..302e9ce793a0 100644
--- a/drivers/cpufreq/powernow-k7.c
+++ b/drivers/cpufreq/powernow-k7.c
@@ -131,7 +131,7 @@ static int check_powernow(void)
131 return 0; 131 return 0;
132 } 132 }
133 133
134 if ((c->x86_model == 6) && (c->x86_mask == 0)) { 134 if ((c->x86_model == 6) && (c->x86_stepping == 0)) {
135 pr_info("K7 660[A0] core detected, enabling errata workarounds\n"); 135 pr_info("K7 660[A0] core detected, enabling errata workarounds\n");
136 have_a0 = 1; 136 have_a0 = 1;
137 } 137 }
diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c
index 41bc5397f4bb..4fa5adf16c70 100644
--- a/drivers/cpufreq/speedstep-centrino.c
+++ b/drivers/cpufreq/speedstep-centrino.c
@@ -37,7 +37,7 @@ struct cpu_id
37{ 37{
38 __u8 x86; /* CPU family */ 38 __u8 x86; /* CPU family */
39 __u8 x86_model; /* model */ 39 __u8 x86_model; /* model */
40 __u8 x86_mask; /* stepping */ 40 __u8 x86_stepping; /* stepping */
41}; 41};
42 42
43enum { 43enum {
@@ -277,7 +277,7 @@ static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
277{ 277{
278 if ((c->x86 == x->x86) && 278 if ((c->x86 == x->x86) &&
279 (c->x86_model == x->x86_model) && 279 (c->x86_model == x->x86_model) &&
280 (c->x86_mask == x->x86_mask)) 280 (c->x86_stepping == x->x86_stepping))
281 return 1; 281 return 1;
282 return 0; 282 return 0;
283} 283}
diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c
index 8085ec9000d1..e3a9962ee410 100644
--- a/drivers/cpufreq/speedstep-lib.c
+++ b/drivers/cpufreq/speedstep-lib.c
@@ -272,9 +272,9 @@ unsigned int speedstep_detect_processor(void)
272 ebx = cpuid_ebx(0x00000001); 272 ebx = cpuid_ebx(0x00000001);
273 ebx &= 0x000000FF; 273 ebx &= 0x000000FF;
274 274
275 pr_debug("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask); 275 pr_debug("ebx value is %x, x86_stepping is %x\n", ebx, c->x86_stepping);
276 276
277 switch (c->x86_mask) { 277 switch (c->x86_stepping) {
278 case 4: 278 case 4:
279 /* 279 /*
280 * B-stepping [M-P4-M] 280 * B-stepping [M-P4-M]
@@ -361,7 +361,7 @@ unsigned int speedstep_detect_processor(void)
361 msr_lo, msr_hi); 361 msr_lo, msr_hi);
362 if ((msr_hi & (1<<18)) && 362 if ((msr_hi & (1<<18)) &&
363 (relaxed_check ? 1 : (msr_hi & (3<<24)))) { 363 (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
364 if (c->x86_mask == 0x01) { 364 if (c->x86_stepping == 0x01) {
365 pr_debug("early PIII version\n"); 365 pr_debug("early PIII version\n");
366 return SPEEDSTEP_CPU_PIII_C_EARLY; 366 return SPEEDSTEP_CPU_PIII_C_EARLY;
367 } else 367 } else
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 75d280cb2dc0..e843cf410373 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -228,12 +228,16 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask,
228 * without any error (HW optimizations for later 228 * without any error (HW optimizations for later
229 * CAAM eras), then try again. 229 * CAAM eras), then try again.
230 */ 230 */
231 if (ret)
232 break;
233
231 rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK; 234 rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK;
232 if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) || 235 if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) ||
233 !(rdsta_val & (1 << sh_idx))) 236 !(rdsta_val & (1 << sh_idx))) {
234 ret = -EAGAIN; 237 ret = -EAGAIN;
235 if (ret)
236 break; 238 break;
239 }
240
237 dev_info(ctrldev, "Instantiated RNG4 SH%d\n", sh_idx); 241 dev_info(ctrldev, "Instantiated RNG4 SH%d\n", sh_idx);
238 /* Clear the contents before recreating the descriptor */ 242 /* Clear the contents before recreating the descriptor */
239 memset(desc, 0x00, CAAM_CMD_SZ * 7); 243 memset(desc, 0x00, CAAM_CMD_SZ * 7);
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 4b6642a25df5..1c6cbda56afe 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -512,7 +512,7 @@ static int __init padlock_init(void)
512 512
513 printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); 513 printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n");
514 514
515 if (c->x86 == 6 && c->x86_model == 15 && c->x86_mask == 2) { 515 if (c->x86 == 6 && c->x86_model == 15 && c->x86_stepping == 2) {
516 ecb_fetch_blocks = MAX_ECB_FETCH_BLOCKS; 516 ecb_fetch_blocks = MAX_ECB_FETCH_BLOCKS;
517 cbc_fetch_blocks = MAX_CBC_FETCH_BLOCKS; 517 cbc_fetch_blocks = MAX_CBC_FETCH_BLOCKS;
518 printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n"); 518 printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n");
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-prng.c b/drivers/crypto/sunxi-ss/sun4i-ss-prng.c
index 0d01d1624252..63d636424161 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss-prng.c
+++ b/drivers/crypto/sunxi-ss/sun4i-ss-prng.c
@@ -28,7 +28,7 @@ int sun4i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src,
28 algt = container_of(alg, struct sun4i_ss_alg_template, alg.rng); 28 algt = container_of(alg, struct sun4i_ss_alg_template, alg.rng);
29 ss = algt->ss; 29 ss = algt->ss;
30 30
31 spin_lock(&ss->slock); 31 spin_lock_bh(&ss->slock);
32 32
33 writel(mode, ss->base + SS_CTL); 33 writel(mode, ss->base + SS_CTL);
34 34
@@ -51,6 +51,6 @@ int sun4i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src,
51 } 51 }
52 52
53 writel(0, ss->base + SS_CTL); 53 writel(0, ss->base + SS_CTL);
54 spin_unlock(&ss->slock); 54 spin_unlock_bh(&ss->slock);
55 return dlen; 55 return 0;
56} 56}
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 9c80e0cb1664..6882fa2f8bad 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1138,6 +1138,10 @@ static int talitos_sg_map(struct device *dev, struct scatterlist *src,
1138 struct talitos_private *priv = dev_get_drvdata(dev); 1138 struct talitos_private *priv = dev_get_drvdata(dev);
1139 bool is_sec1 = has_ftr_sec1(priv); 1139 bool is_sec1 = has_ftr_sec1(priv);
1140 1140
1141 if (!src) {
1142 to_talitos_ptr(ptr, 0, 0, is_sec1);
1143 return 1;
1144 }
1141 if (sg_count == 1) { 1145 if (sg_count == 1) {
1142 to_talitos_ptr(ptr, sg_dma_address(src) + offset, len, is_sec1); 1146 to_talitos_ptr(ptr, sg_dma_address(src) + offset, len, is_sec1);
1143 return sg_count; 1147 return sg_count;
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 8b16ec595fa7..329cb96f886f 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -3147,7 +3147,7 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
3147 struct amd64_family_type *fam_type = NULL; 3147 struct amd64_family_type *fam_type = NULL;
3148 3148
3149 pvt->ext_model = boot_cpu_data.x86_model >> 4; 3149 pvt->ext_model = boot_cpu_data.x86_model >> 4;
3150 pvt->stepping = boot_cpu_data.x86_mask; 3150 pvt->stepping = boot_cpu_data.x86_stepping;
3151 pvt->model = boot_cpu_data.x86_model; 3151 pvt->model = boot_cpu_data.x86_model;
3152 pvt->fam = boot_cpu_data.x86; 3152 pvt->fam = boot_cpu_data.x86;
3153 3153
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index e2c3c5ec42d1..c53095b3b0fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -568,6 +568,7 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = {
568 /* HG _PR3 doesn't seem to work on this A+A weston board */ 568 /* HG _PR3 doesn't seem to work on this A+A weston board */
569 { 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX }, 569 { 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX },
570 { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX }, 570 { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX },
571 { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX },
571 { 0, 0, 0, 0, 0 }, 572 { 0, 0, 0, 0, 0 },
572}; 573};
573 574
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 909499b73d03..021f722e2481 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -733,6 +733,25 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
733 return ret == 0 ? count : ret; 733 return ret == 0 ? count : ret;
734} 734}
735 735
736static bool gtt_entry(struct mdev_device *mdev, loff_t *ppos)
737{
738 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
739 unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
740 struct intel_gvt *gvt = vgpu->gvt;
741 int offset;
742
743 /* Only allow MMIO GGTT entry access */
744 if (index != PCI_BASE_ADDRESS_0)
745 return false;
746
747 offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) -
748 intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0);
749
750 return (offset >= gvt->device_info.gtt_start_offset &&
751 offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ?
752 true : false;
753}
754
736static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf, 755static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
737 size_t count, loff_t *ppos) 756 size_t count, loff_t *ppos)
738{ 757{
@@ -742,7 +761,21 @@ static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
742 while (count) { 761 while (count) {
743 size_t filled; 762 size_t filled;
744 763
745 if (count >= 4 && !(*ppos % 4)) { 764 /* Only support GGTT entry 8 bytes read */
765 if (count >= 8 && !(*ppos % 8) &&
766 gtt_entry(mdev, ppos)) {
767 u64 val;
768
769 ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
770 ppos, false);
771 if (ret <= 0)
772 goto read_err;
773
774 if (copy_to_user(buf, &val, sizeof(val)))
775 goto read_err;
776
777 filled = 8;
778 } else if (count >= 4 && !(*ppos % 4)) {
746 u32 val; 779 u32 val;
747 780
748 ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), 781 ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
@@ -802,7 +835,21 @@ static ssize_t intel_vgpu_write(struct mdev_device *mdev,
802 while (count) { 835 while (count) {
803 size_t filled; 836 size_t filled;
804 837
805 if (count >= 4 && !(*ppos % 4)) { 838 /* Only support GGTT entry 8 bytes write */
839 if (count >= 8 && !(*ppos % 8) &&
840 gtt_entry(mdev, ppos)) {
841 u64 val;
842
843 if (copy_from_user(&val, buf, sizeof(val)))
844 goto write_err;
845
846 ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
847 ppos, true);
848 if (ret <= 0)
849 goto write_err;
850
851 filled = 8;
852 } else if (count >= 4 && !(*ppos % 4)) {
806 u32 val; 853 u32 val;
807 854
808 if (copy_from_user(&val, buf, sizeof(val))) 855 if (copy_from_user(&val, buf, sizeof(val)))
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 73ad6e90e49d..256f1bb522b7 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -118,6 +118,7 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
118 {RCS, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */ 118 {RCS, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */
119 {RCS, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */ 119 {RCS, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */
120 {RCS, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */ 120 {RCS, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */
121 {RCS, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */
121 {RCS, TRVATTL3PTRDW(0), 0, false}, /* 0x4de0 */ 122 {RCS, TRVATTL3PTRDW(0), 0, false}, /* 0x4de0 */
122 {RCS, TRVATTL3PTRDW(1), 0, false}, /* 0x4de4 */ 123 {RCS, TRVATTL3PTRDW(1), 0, false}, /* 0x4de4 */
123 {RCS, TRNULLDETCT, 0, false}, /* 0x4de8 */ 124 {RCS, TRNULLDETCT, 0, false}, /* 0x4de8 */
diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h
index 7a2511538f34..736bd2bc5127 100644
--- a/drivers/gpu/drm/i915/gvt/trace.h
+++ b/drivers/gpu/drm/i915/gvt/trace.h
@@ -333,7 +333,7 @@ TRACE_EVENT(render_mmio,
333 TP_PROTO(int old_id, int new_id, char *action, unsigned int reg, 333 TP_PROTO(int old_id, int new_id, char *action, unsigned int reg,
334 unsigned int old_val, unsigned int new_val), 334 unsigned int old_val, unsigned int new_val),
335 335
336 TP_ARGS(old_id, new_id, action, reg, new_val, old_val), 336 TP_ARGS(old_id, new_id, action, reg, old_val, new_val),
337 337
338 TP_STRUCT__entry( 338 TP_STRUCT__entry(
339 __field(int, old_id) 339 __field(int, old_id)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 173d0095e3b2..2f5209de0391 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1433,19 +1433,7 @@ void i915_driver_unload(struct drm_device *dev)
1433 1433
1434 intel_modeset_cleanup(dev); 1434 intel_modeset_cleanup(dev);
1435 1435
1436 /* 1436 intel_bios_cleanup(dev_priv);
1437 * free the memory space allocated for the child device
1438 * config parsed from VBT
1439 */
1440 if (dev_priv->vbt.child_dev && dev_priv->vbt.child_dev_num) {
1441 kfree(dev_priv->vbt.child_dev);
1442 dev_priv->vbt.child_dev = NULL;
1443 dev_priv->vbt.child_dev_num = 0;
1444 }
1445 kfree(dev_priv->vbt.sdvo_lvds_vbt_mode);
1446 dev_priv->vbt.sdvo_lvds_vbt_mode = NULL;
1447 kfree(dev_priv->vbt.lfp_lvds_vbt_mode);
1448 dev_priv->vbt.lfp_lvds_vbt_mode = NULL;
1449 1437
1450 vga_switcheroo_unregister_client(pdev); 1438 vga_switcheroo_unregister_client(pdev);
1451 vga_client_register(pdev, NULL, NULL, NULL); 1439 vga_client_register(pdev, NULL, NULL, NULL);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a42deebedb0f..d307429a5ae0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1349,6 +1349,7 @@ struct intel_vbt_data {
1349 u32 size; 1349 u32 size;
1350 u8 *data; 1350 u8 *data;
1351 const u8 *sequence[MIPI_SEQ_MAX]; 1351 const u8 *sequence[MIPI_SEQ_MAX];
1352 u8 *deassert_seq; /* Used by fixup_mipi_sequences() */
1352 } dsi; 1353 } dsi;
1353 1354
1354 int crt_ddc_pin; 1355 int crt_ddc_pin;
@@ -3657,6 +3658,7 @@ extern void intel_i2c_reset(struct drm_i915_private *dev_priv);
3657 3658
3658/* intel_bios.c */ 3659/* intel_bios.c */
3659void intel_bios_init(struct drm_i915_private *dev_priv); 3660void intel_bios_init(struct drm_i915_private *dev_priv);
3661void intel_bios_cleanup(struct drm_i915_private *dev_priv);
3660bool intel_bios_is_valid_vbt(const void *buf, size_t size); 3662bool intel_bios_is_valid_vbt(const void *buf, size_t size);
3661bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv); 3663bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv);
3662bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin); 3664bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 648e7536ff51..0c963fcf31ff 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -803,7 +803,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
803 803
804 case I915_CONTEXT_PARAM_PRIORITY: 804 case I915_CONTEXT_PARAM_PRIORITY:
805 { 805 {
806 int priority = args->value; 806 s64 priority = args->value;
807 807
808 if (args->size) 808 if (args->size)
809 ret = -EINVAL; 809 ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_oa_cflgt3.c b/drivers/gpu/drm/i915/i915_oa_cflgt3.c
index 42ff06fe54a3..792facdb6702 100644
--- a/drivers/gpu/drm/i915/i915_oa_cflgt3.c
+++ b/drivers/gpu/drm/i915/i915_oa_cflgt3.c
@@ -84,9 +84,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
84void 84void
85i915_perf_load_test_config_cflgt3(struct drm_i915_private *dev_priv) 85i915_perf_load_test_config_cflgt3(struct drm_i915_private *dev_priv)
86{ 86{
87 strncpy(dev_priv->perf.oa.test_config.uuid, 87 strlcpy(dev_priv->perf.oa.test_config.uuid,
88 "577e8e2c-3fa0-4875-8743-3538d585e3b0", 88 "577e8e2c-3fa0-4875-8743-3538d585e3b0",
89 UUID_STRING_LEN); 89 sizeof(dev_priv->perf.oa.test_config.uuid));
90 dev_priv->perf.oa.test_config.id = 1; 90 dev_priv->perf.oa.test_config.id = 1;
91 91
92 dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; 92 dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa;
diff --git a/drivers/gpu/drm/i915/i915_oa_cnl.c b/drivers/gpu/drm/i915/i915_oa_cnl.c
index ff0ac3627cc4..ba9140c87cc0 100644
--- a/drivers/gpu/drm/i915/i915_oa_cnl.c
+++ b/drivers/gpu/drm/i915/i915_oa_cnl.c
@@ -96,9 +96,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
96void 96void
97i915_perf_load_test_config_cnl(struct drm_i915_private *dev_priv) 97i915_perf_load_test_config_cnl(struct drm_i915_private *dev_priv)
98{ 98{
99 strncpy(dev_priv->perf.oa.test_config.uuid, 99 strlcpy(dev_priv->perf.oa.test_config.uuid,
100 "db41edd4-d8e7-4730-ad11-b9a2d6833503", 100 "db41edd4-d8e7-4730-ad11-b9a2d6833503",
101 UUID_STRING_LEN); 101 sizeof(dev_priv->perf.oa.test_config.uuid));
102 dev_priv->perf.oa.test_config.id = 1; 102 dev_priv->perf.oa.test_config.id = 1;
103 103
104 dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; 104 dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa;
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 55a8a1e29424..0e9b98c32b62 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -285,26 +285,41 @@ static u64 count_interrupts(struct drm_i915_private *i915)
285 return sum; 285 return sum;
286} 286}
287 287
288static void i915_pmu_event_destroy(struct perf_event *event) 288static void engine_event_destroy(struct perf_event *event)
289{ 289{
290 WARN_ON(event->parent); 290 struct drm_i915_private *i915 =
291 container_of(event->pmu, typeof(*i915), pmu.base);
292 struct intel_engine_cs *engine;
293
294 engine = intel_engine_lookup_user(i915,
295 engine_event_class(event),
296 engine_event_instance(event));
297 if (WARN_ON_ONCE(!engine))
298 return;
299
300 if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
301 intel_engine_supports_stats(engine))
302 intel_disable_engine_stats(engine);
291} 303}
292 304
293static int engine_event_init(struct perf_event *event) 305static void i915_pmu_event_destroy(struct perf_event *event)
294{ 306{
295 struct drm_i915_private *i915 = 307 WARN_ON(event->parent);
296 container_of(event->pmu, typeof(*i915), pmu.base);
297 308
298 if (!intel_engine_lookup_user(i915, engine_event_class(event), 309 if (is_engine_event(event))
299 engine_event_instance(event))) 310 engine_event_destroy(event);
300 return -ENODEV; 311}
301 312
302 switch (engine_event_sample(event)) { 313static int
314engine_event_status(struct intel_engine_cs *engine,
315 enum drm_i915_pmu_engine_sample sample)
316{
317 switch (sample) {
303 case I915_SAMPLE_BUSY: 318 case I915_SAMPLE_BUSY:
304 case I915_SAMPLE_WAIT: 319 case I915_SAMPLE_WAIT:
305 break; 320 break;
306 case I915_SAMPLE_SEMA: 321 case I915_SAMPLE_SEMA:
307 if (INTEL_GEN(i915) < 6) 322 if (INTEL_GEN(engine->i915) < 6)
308 return -ENODEV; 323 return -ENODEV;
309 break; 324 break;
310 default: 325 default:
@@ -314,6 +329,30 @@ static int engine_event_init(struct perf_event *event)
314 return 0; 329 return 0;
315} 330}
316 331
332static int engine_event_init(struct perf_event *event)
333{
334 struct drm_i915_private *i915 =
335 container_of(event->pmu, typeof(*i915), pmu.base);
336 struct intel_engine_cs *engine;
337 u8 sample;
338 int ret;
339
340 engine = intel_engine_lookup_user(i915, engine_event_class(event),
341 engine_event_instance(event));
342 if (!engine)
343 return -ENODEV;
344
345 sample = engine_event_sample(event);
346 ret = engine_event_status(engine, sample);
347 if (ret)
348 return ret;
349
350 if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
351 ret = intel_enable_engine_stats(engine);
352
353 return ret;
354}
355
317static int i915_pmu_event_init(struct perf_event *event) 356static int i915_pmu_event_init(struct perf_event *event)
318{ 357{
319 struct drm_i915_private *i915 = 358 struct drm_i915_private *i915 =
@@ -370,7 +409,94 @@ static int i915_pmu_event_init(struct perf_event *event)
370 return 0; 409 return 0;
371} 410}
372 411
373static u64 __i915_pmu_event_read(struct perf_event *event) 412static u64 __get_rc6(struct drm_i915_private *i915)
413{
414 u64 val;
415
416 val = intel_rc6_residency_ns(i915,
417 IS_VALLEYVIEW(i915) ?
418 VLV_GT_RENDER_RC6 :
419 GEN6_GT_GFX_RC6);
420
421 if (HAS_RC6p(i915))
422 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
423
424 if (HAS_RC6pp(i915))
425 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
426
427 return val;
428}
429
430static u64 get_rc6(struct drm_i915_private *i915, bool locked)
431{
432#if IS_ENABLED(CONFIG_PM)
433 unsigned long flags;
434 u64 val;
435
436 if (intel_runtime_pm_get_if_in_use(i915)) {
437 val = __get_rc6(i915);
438 intel_runtime_pm_put(i915);
439
440 /*
441 * If we are coming back from being runtime suspended we must
442 * be careful not to report a larger value than returned
443 * previously.
444 */
445
446 if (!locked)
447 spin_lock_irqsave(&i915->pmu.lock, flags);
448
449 if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
450 i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
451 i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
452 } else {
453 val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
454 }
455
456 if (!locked)
457 spin_unlock_irqrestore(&i915->pmu.lock, flags);
458 } else {
459 struct pci_dev *pdev = i915->drm.pdev;
460 struct device *kdev = &pdev->dev;
461 unsigned long flags2;
462
463 /*
464 * We are runtime suspended.
465 *
466 * Report the delta from when the device was suspended to now,
467 * on top of the last known real value, as the approximated RC6
468 * counter value.
469 */
470 if (!locked)
471 spin_lock_irqsave(&i915->pmu.lock, flags);
472
473 spin_lock_irqsave(&kdev->power.lock, flags2);
474
475 if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
476 i915->pmu.suspended_jiffies_last =
477 kdev->power.suspended_jiffies;
478
479 val = kdev->power.suspended_jiffies -
480 i915->pmu.suspended_jiffies_last;
481 val += jiffies - kdev->power.accounting_timestamp;
482
483 spin_unlock_irqrestore(&kdev->power.lock, flags2);
484
485 val = jiffies_to_nsecs(val);
486 val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
487 i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
488
489 if (!locked)
490 spin_unlock_irqrestore(&i915->pmu.lock, flags);
491 }
492
493 return val;
494#else
495 return __get_rc6(i915);
496#endif
497}
498
499static u64 __i915_pmu_event_read(struct perf_event *event, bool locked)
374{ 500{
375 struct drm_i915_private *i915 = 501 struct drm_i915_private *i915 =
376 container_of(event->pmu, typeof(*i915), pmu.base); 502 container_of(event->pmu, typeof(*i915), pmu.base);
@@ -387,7 +513,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
387 if (WARN_ON_ONCE(!engine)) { 513 if (WARN_ON_ONCE(!engine)) {
388 /* Do nothing */ 514 /* Do nothing */
389 } else if (sample == I915_SAMPLE_BUSY && 515 } else if (sample == I915_SAMPLE_BUSY &&
390 engine->pmu.busy_stats) { 516 intel_engine_supports_stats(engine)) {
391 val = ktime_to_ns(intel_engine_get_busy_time(engine)); 517 val = ktime_to_ns(intel_engine_get_busy_time(engine));
392 } else { 518 } else {
393 val = engine->pmu.sample[sample].cur; 519 val = engine->pmu.sample[sample].cur;
@@ -408,18 +534,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
408 val = count_interrupts(i915); 534 val = count_interrupts(i915);
409 break; 535 break;
410 case I915_PMU_RC6_RESIDENCY: 536 case I915_PMU_RC6_RESIDENCY:
411 intel_runtime_pm_get(i915); 537 val = get_rc6(i915, locked);
412 val = intel_rc6_residency_ns(i915,
413 IS_VALLEYVIEW(i915) ?
414 VLV_GT_RENDER_RC6 :
415 GEN6_GT_GFX_RC6);
416 if (HAS_RC6p(i915))
417 val += intel_rc6_residency_ns(i915,
418 GEN6_GT_GFX_RC6p);
419 if (HAS_RC6pp(i915))
420 val += intel_rc6_residency_ns(i915,
421 GEN6_GT_GFX_RC6pp);
422 intel_runtime_pm_put(i915);
423 break; 538 break;
424 } 539 }
425 } 540 }
@@ -434,7 +549,7 @@ static void i915_pmu_event_read(struct perf_event *event)
434 549
435again: 550again:
436 prev = local64_read(&hwc->prev_count); 551 prev = local64_read(&hwc->prev_count);
437 new = __i915_pmu_event_read(event); 552 new = __i915_pmu_event_read(event, false);
438 553
439 if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev) 554 if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
440 goto again; 555 goto again;
@@ -442,12 +557,6 @@ again:
442 local64_add(new - prev, &event->count); 557 local64_add(new - prev, &event->count);
443} 558}
444 559
445static bool engine_needs_busy_stats(struct intel_engine_cs *engine)
446{
447 return intel_engine_supports_stats(engine) &&
448 (engine->pmu.enable & BIT(I915_SAMPLE_BUSY));
449}
450
451static void i915_pmu_enable(struct perf_event *event) 560static void i915_pmu_enable(struct perf_event *event)
452{ 561{
453 struct drm_i915_private *i915 = 562 struct drm_i915_private *i915 =
@@ -487,21 +596,7 @@ static void i915_pmu_enable(struct perf_event *event)
487 596
488 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); 597 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
489 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); 598 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
490 if (engine->pmu.enable_count[sample]++ == 0) { 599 engine->pmu.enable_count[sample]++;
491 /*
492 * Enable engine busy stats tracking if needed or
493 * alternatively cancel the scheduled disable.
494 *
495 * If the delayed disable was pending, cancel it and
496 * in this case do not enable since it already is.
497 */
498 if (engine_needs_busy_stats(engine) &&
499 !engine->pmu.busy_stats) {
500 engine->pmu.busy_stats = true;
501 if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
502 intel_enable_engine_stats(engine);
503 }
504 }
505 } 600 }
506 601
507 /* 602 /*
@@ -509,19 +604,11 @@ static void i915_pmu_enable(struct perf_event *event)
509 * for all listeners. Even when the event was already enabled and has 604 * for all listeners. Even when the event was already enabled and has
510 * an existing non-zero value. 605 * an existing non-zero value.
511 */ 606 */
512 local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); 607 local64_set(&event->hw.prev_count, __i915_pmu_event_read(event, true));
513 608
514 spin_unlock_irqrestore(&i915->pmu.lock, flags); 609 spin_unlock_irqrestore(&i915->pmu.lock, flags);
515} 610}
516 611
517static void __disable_busy_stats(struct work_struct *work)
518{
519 struct intel_engine_cs *engine =
520 container_of(work, typeof(*engine), pmu.disable_busy_stats.work);
521
522 intel_disable_engine_stats(engine);
523}
524
525static void i915_pmu_disable(struct perf_event *event) 612static void i915_pmu_disable(struct perf_event *event)
526{ 613{
527 struct drm_i915_private *i915 = 614 struct drm_i915_private *i915 =
@@ -545,26 +632,8 @@ static void i915_pmu_disable(struct perf_event *event)
545 * Decrement the reference count and clear the enabled 632 * Decrement the reference count and clear the enabled
546 * bitmask when the last listener on an event goes away. 633 * bitmask when the last listener on an event goes away.
547 */ 634 */
548 if (--engine->pmu.enable_count[sample] == 0) { 635 if (--engine->pmu.enable_count[sample] == 0)
549 engine->pmu.enable &= ~BIT(sample); 636 engine->pmu.enable &= ~BIT(sample);
550 if (!engine_needs_busy_stats(engine) &&
551 engine->pmu.busy_stats) {
552 engine->pmu.busy_stats = false;
553 /*
554 * We request a delayed disable to handle the
555 * rapid on/off cycles on events, which can
556 * happen when tools like perf stat start, in a
557 * nicer way.
558 *
559 * In addition, this also helps with busy stats
560 * accuracy with background CPU offline/online
561 * migration events.
562 */
563 queue_delayed_work(system_wq,
564 &engine->pmu.disable_busy_stats,
565 round_jiffies_up_relative(HZ));
566 }
567 }
568 } 637 }
569 638
570 GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); 639 GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
@@ -797,8 +866,6 @@ static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
797 866
798void i915_pmu_register(struct drm_i915_private *i915) 867void i915_pmu_register(struct drm_i915_private *i915)
799{ 868{
800 struct intel_engine_cs *engine;
801 enum intel_engine_id id;
802 int ret; 869 int ret;
803 870
804 if (INTEL_GEN(i915) <= 2) { 871 if (INTEL_GEN(i915) <= 2) {
@@ -820,10 +887,6 @@ void i915_pmu_register(struct drm_i915_private *i915)
820 hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 887 hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
821 i915->pmu.timer.function = i915_sample; 888 i915->pmu.timer.function = i915_sample;
822 889
823 for_each_engine(engine, i915, id)
824 INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats,
825 __disable_busy_stats);
826
827 ret = perf_pmu_register(&i915->pmu.base, "i915", -1); 890 ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
828 if (ret) 891 if (ret)
829 goto err; 892 goto err;
@@ -843,9 +906,6 @@ err:
843 906
844void i915_pmu_unregister(struct drm_i915_private *i915) 907void i915_pmu_unregister(struct drm_i915_private *i915)
845{ 908{
846 struct intel_engine_cs *engine;
847 enum intel_engine_id id;
848
849 if (!i915->pmu.base.event_init) 909 if (!i915->pmu.base.event_init)
850 return; 910 return;
851 911
@@ -853,11 +913,6 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
853 913
854 hrtimer_cancel(&i915->pmu.timer); 914 hrtimer_cancel(&i915->pmu.timer);
855 915
856 for_each_engine(engine, i915, id) {
857 GEM_BUG_ON(engine->pmu.busy_stats);
858 flush_delayed_work(&engine->pmu.disable_busy_stats);
859 }
860
861 i915_pmu_unregister_cpuhp_state(i915); 916 i915_pmu_unregister_cpuhp_state(i915);
862 917
863 perf_pmu_unregister(&i915->pmu.base); 918 perf_pmu_unregister(&i915->pmu.base);
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 40c154d13565..bb62df15afa4 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -27,6 +27,8 @@
27enum { 27enum {
28 __I915_SAMPLE_FREQ_ACT = 0, 28 __I915_SAMPLE_FREQ_ACT = 0,
29 __I915_SAMPLE_FREQ_REQ, 29 __I915_SAMPLE_FREQ_REQ,
30 __I915_SAMPLE_RC6,
31 __I915_SAMPLE_RC6_ESTIMATED,
30 __I915_NUM_PMU_SAMPLERS 32 __I915_NUM_PMU_SAMPLERS
31}; 33};
32 34
@@ -94,6 +96,10 @@ struct i915_pmu {
94 * struct intel_engine_cs. 96 * struct intel_engine_cs.
95 */ 97 */
96 struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS]; 98 struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
99 /**
100 * @suspended_jiffies_last: Cached suspend time from PM core.
101 */
102 unsigned long suspended_jiffies_last;
97}; 103};
98 104
99#ifdef CONFIG_PERF_EVENTS 105#ifdef CONFIG_PERF_EVENTS
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index f7f771749e48..b49a2df44430 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -947,6 +947,86 @@ static int goto_next_sequence_v3(const u8 *data, int index, int total)
947 return 0; 947 return 0;
948} 948}
949 949
950/*
951 * Get len of pre-fixed deassert fragment from a v1 init OTP sequence,
952 * skip all delay + gpio operands and stop at the first DSI packet op.
953 */
954static int get_init_otp_deassert_fragment_len(struct drm_i915_private *dev_priv)
955{
956 const u8 *data = dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP];
957 int index, len;
958
959 if (WARN_ON(!data || dev_priv->vbt.dsi.seq_version != 1))
960 return 0;
961
962 /* index = 1 to skip sequence byte */
963 for (index = 1; data[index] != MIPI_SEQ_ELEM_END; index += len) {
964 switch (data[index]) {
965 case MIPI_SEQ_ELEM_SEND_PKT:
966 return index == 1 ? 0 : index;
967 case MIPI_SEQ_ELEM_DELAY:
968 len = 5; /* 1 byte for operand + uint32 */
969 break;
970 case MIPI_SEQ_ELEM_GPIO:
971 len = 3; /* 1 byte for op, 1 for gpio_nr, 1 for value */
972 break;
973 default:
974 return 0;
975 }
976 }
977
978 return 0;
979}
980
981/*
982 * Some v1 VBT MIPI sequences do the deassert in the init OTP sequence.
983 * The deassert must be done before calling intel_dsi_device_ready, so for
984 * these devices we split the init OTP sequence into a deassert sequence and
985 * the actual init OTP part.
986 */
987static void fixup_mipi_sequences(struct drm_i915_private *dev_priv)
988{
989 u8 *init_otp;
990 int len;
991
992 /* Limit this to VLV for now. */
993 if (!IS_VALLEYVIEW(dev_priv))
994 return;
995
996 /* Limit this to v1 vid-mode sequences */
997 if (dev_priv->vbt.dsi.config->is_cmd_mode ||
998 dev_priv->vbt.dsi.seq_version != 1)
999 return;
1000
1001 /* Only do this if there are otp and assert seqs and no deassert seq */
1002 if (!dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] ||
1003 !dev_priv->vbt.dsi.sequence[MIPI_SEQ_ASSERT_RESET] ||
1004 dev_priv->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET])
1005 return;
1006
1007 /* The deassert-sequence ends at the first DSI packet */
1008 len = get_init_otp_deassert_fragment_len(dev_priv);
1009 if (!len)
1010 return;
1011
1012 DRM_DEBUG_KMS("Using init OTP fragment to deassert reset\n");
1013
1014 /* Copy the fragment, update seq byte and terminate it */
1015 init_otp = (u8 *)dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP];
1016 dev_priv->vbt.dsi.deassert_seq = kmemdup(init_otp, len + 1, GFP_KERNEL);
1017 if (!dev_priv->vbt.dsi.deassert_seq)
1018 return;
1019 dev_priv->vbt.dsi.deassert_seq[0] = MIPI_SEQ_DEASSERT_RESET;
1020 dev_priv->vbt.dsi.deassert_seq[len] = MIPI_SEQ_ELEM_END;
1021 /* Use the copy for deassert */
1022 dev_priv->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET] =
1023 dev_priv->vbt.dsi.deassert_seq;
1024 /* Replace the last byte of the fragment with init OTP seq byte */
1025 init_otp[len - 1] = MIPI_SEQ_INIT_OTP;
1026 /* And make MIPI_MIPI_SEQ_INIT_OTP point to it */
1027 dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] = init_otp + len - 1;
1028}
1029
950static void 1030static void
951parse_mipi_sequence(struct drm_i915_private *dev_priv, 1031parse_mipi_sequence(struct drm_i915_private *dev_priv,
952 const struct bdb_header *bdb) 1032 const struct bdb_header *bdb)
@@ -1016,6 +1096,8 @@ parse_mipi_sequence(struct drm_i915_private *dev_priv,
1016 dev_priv->vbt.dsi.size = seq_size; 1096 dev_priv->vbt.dsi.size = seq_size;
1017 dev_priv->vbt.dsi.seq_version = sequence->version; 1097 dev_priv->vbt.dsi.seq_version = sequence->version;
1018 1098
1099 fixup_mipi_sequences(dev_priv);
1100
1019 DRM_DEBUG_DRIVER("MIPI related VBT parsing complete\n"); 1101 DRM_DEBUG_DRIVER("MIPI related VBT parsing complete\n");
1020 return; 1102 return;
1021 1103
@@ -1589,6 +1671,29 @@ out:
1589} 1671}
1590 1672
1591/** 1673/**
1674 * intel_bios_cleanup - Free any resources allocated by intel_bios_init()
1675 * @dev_priv: i915 device instance
1676 */
1677void intel_bios_cleanup(struct drm_i915_private *dev_priv)
1678{
1679 kfree(dev_priv->vbt.child_dev);
1680 dev_priv->vbt.child_dev = NULL;
1681 dev_priv->vbt.child_dev_num = 0;
1682 kfree(dev_priv->vbt.sdvo_lvds_vbt_mode);
1683 dev_priv->vbt.sdvo_lvds_vbt_mode = NULL;
1684 kfree(dev_priv->vbt.lfp_lvds_vbt_mode);
1685 dev_priv->vbt.lfp_lvds_vbt_mode = NULL;
1686 kfree(dev_priv->vbt.dsi.data);
1687 dev_priv->vbt.dsi.data = NULL;
1688 kfree(dev_priv->vbt.dsi.pps);
1689 dev_priv->vbt.dsi.pps = NULL;
1690 kfree(dev_priv->vbt.dsi.config);
1691 dev_priv->vbt.dsi.config = NULL;
1692 kfree(dev_priv->vbt.dsi.deassert_seq);
1693 dev_priv->vbt.dsi.deassert_seq = NULL;
1694}
1695
1696/**
1592 * intel_bios_is_tv_present - is integrated TV present in VBT 1697 * intel_bios_is_tv_present - is integrated TV present in VBT
1593 * @dev_priv: i915 device instance 1698 * @dev_priv: i915 device instance
1594 * 1699 *
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index bd40fea16b4f..f54ddda9fdad 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -594,29 +594,16 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
594 spin_unlock_irq(&b->rb_lock); 594 spin_unlock_irq(&b->rb_lock);
595} 595}
596 596
597static bool signal_valid(const struct drm_i915_gem_request *request)
598{
599 return intel_wait_check_request(&request->signaling.wait, request);
600}
601
602static bool signal_complete(const struct drm_i915_gem_request *request) 597static bool signal_complete(const struct drm_i915_gem_request *request)
603{ 598{
604 if (!request) 599 if (!request)
605 return false; 600 return false;
606 601
607 /* If another process served as the bottom-half it may have already 602 /*
608 * signalled that this wait is already completed. 603 * Carefully check if the request is complete, giving time for the
609 */
610 if (intel_wait_complete(&request->signaling.wait))
611 return signal_valid(request);
612
613 /* Carefully check if the request is complete, giving time for the
614 * seqno to be visible or if the GPU hung. 604 * seqno to be visible or if the GPU hung.
615 */ 605 */
616 if (__i915_request_irq_complete(request)) 606 return __i915_request_irq_complete(request);
617 return true;
618
619 return false;
620} 607}
621 608
622static struct drm_i915_gem_request *to_signaler(struct rb_node *rb) 609static struct drm_i915_gem_request *to_signaler(struct rb_node *rb)
@@ -659,9 +646,13 @@ static int intel_breadcrumbs_signaler(void *arg)
659 request = i915_gem_request_get_rcu(request); 646 request = i915_gem_request_get_rcu(request);
660 rcu_read_unlock(); 647 rcu_read_unlock();
661 if (signal_complete(request)) { 648 if (signal_complete(request)) {
662 local_bh_disable(); 649 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
663 dma_fence_signal(&request->fence); 650 &request->fence.flags)) {
664 local_bh_enable(); /* kick start the tasklets */ 651 local_bh_disable();
652 dma_fence_signal(&request->fence);
653 GEM_BUG_ON(!i915_gem_request_completed(request));
654 local_bh_enable(); /* kick start the tasklets */
655 }
665 656
666 spin_lock_irq(&b->rb_lock); 657 spin_lock_irq(&b->rb_lock);
667 658
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index 5dc118f26b51..1704c8897afd 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -1952,6 +1952,14 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
1952 if (crtc_state->has_audio && INTEL_GEN(dev_priv) >= 9) 1952 if (crtc_state->has_audio && INTEL_GEN(dev_priv) >= 9)
1953 min_cdclk = max(2 * 96000, min_cdclk); 1953 min_cdclk = max(2 * 96000, min_cdclk);
1954 1954
1955 /*
1956 * On Valleyview some DSI panels lose (v|h)sync when the clock is lower
1957 * than 320000KHz.
1958 */
1959 if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI) &&
1960 IS_VALLEYVIEW(dev_priv))
1961 min_cdclk = max(320000, min_cdclk);
1962
1955 if (min_cdclk > dev_priv->max_cdclk_freq) { 1963 if (min_cdclk > dev_priv->max_cdclk_freq) {
1956 DRM_DEBUG_KMS("required cdclk (%d kHz) exceeds max (%d kHz)\n", 1964 DRM_DEBUG_KMS("required cdclk (%d kHz) exceeds max (%d kHz)\n",
1957 min_cdclk, dev_priv->max_cdclk_freq); 1965 min_cdclk, dev_priv->max_cdclk_freq);
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index d790bdc227ff..fa960cfd2764 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1458,7 +1458,9 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
1458 struct drm_i915_private *dev_priv = engine->i915; 1458 struct drm_i915_private *dev_priv = engine->i915;
1459 bool idle = true; 1459 bool idle = true;
1460 1460
1461 intel_runtime_pm_get(dev_priv); 1461 /* If the whole device is asleep, the engine must be idle */
1462 if (!intel_runtime_pm_get_if_in_use(dev_priv))
1463 return true;
1462 1464
1463 /* First check that no commands are left in the ring */ 1465 /* First check that no commands are left in the ring */
1464 if ((I915_READ_HEAD(engine) & HEAD_ADDR) != 1466 if ((I915_READ_HEAD(engine) & HEAD_ADDR) !=
@@ -1943,16 +1945,22 @@ intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
1943 */ 1945 */
1944int intel_enable_engine_stats(struct intel_engine_cs *engine) 1946int intel_enable_engine_stats(struct intel_engine_cs *engine)
1945{ 1947{
1948 struct intel_engine_execlists *execlists = &engine->execlists;
1946 unsigned long flags; 1949 unsigned long flags;
1950 int err = 0;
1947 1951
1948 if (!intel_engine_supports_stats(engine)) 1952 if (!intel_engine_supports_stats(engine))
1949 return -ENODEV; 1953 return -ENODEV;
1950 1954
1955 tasklet_disable(&execlists->tasklet);
1951 spin_lock_irqsave(&engine->stats.lock, flags); 1956 spin_lock_irqsave(&engine->stats.lock, flags);
1952 if (engine->stats.enabled == ~0) 1957
1953 goto busy; 1958 if (unlikely(engine->stats.enabled == ~0)) {
1959 err = -EBUSY;
1960 goto unlock;
1961 }
1962
1954 if (engine->stats.enabled++ == 0) { 1963 if (engine->stats.enabled++ == 0) {
1955 struct intel_engine_execlists *execlists = &engine->execlists;
1956 const struct execlist_port *port = execlists->port; 1964 const struct execlist_port *port = execlists->port;
1957 unsigned int num_ports = execlists_num_ports(execlists); 1965 unsigned int num_ports = execlists_num_ports(execlists);
1958 1966
@@ -1967,14 +1975,12 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
1967 if (engine->stats.active) 1975 if (engine->stats.active)
1968 engine->stats.start = engine->stats.enabled_at; 1976 engine->stats.start = engine->stats.enabled_at;
1969 } 1977 }
1970 spin_unlock_irqrestore(&engine->stats.lock, flags);
1971
1972 return 0;
1973 1978
1974busy: 1979unlock:
1975 spin_unlock_irqrestore(&engine->stats.lock, flags); 1980 spin_unlock_irqrestore(&engine->stats.lock, flags);
1981 tasklet_enable(&execlists->tasklet);
1976 1982
1977 return -EBUSY; 1983 return err;
1978} 1984}
1979 1985
1980static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) 1986static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c5ff203e42d6..a0e7a6c2a57c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -366,20 +366,6 @@ struct intel_engine_cs {
366 */ 366 */
367#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1) 367#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
368 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX]; 368 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
369 /**
370 * @busy_stats: Has enablement of engine stats tracking been
371 * requested.
372 */
373 bool busy_stats;
374 /**
375 * @disable_busy_stats: Work item for busy stats disabling.
376 *
377 * Same as with @enable_busy_stats action, with the difference
378 * that we delay it in case there are rapid enable-disable
379 * actions, which can happen during tool startup (like perf
380 * stat).
381 */
382 struct delayed_work disable_busy_stats;
383 } pmu; 369 } pmu;
384 370
385 /* 371 /*
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
index bf62303571b3..3695cde669f8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
@@ -301,7 +301,7 @@ nvkm_therm_attr_set(struct nvkm_therm *therm,
301void 301void
302nvkm_therm_clkgate_enable(struct nvkm_therm *therm) 302nvkm_therm_clkgate_enable(struct nvkm_therm *therm)
303{ 303{
304 if (!therm->func->clkgate_enable || !therm->clkgating_enabled) 304 if (!therm || !therm->func->clkgate_enable || !therm->clkgating_enabled)
305 return; 305 return;
306 306
307 nvkm_debug(&therm->subdev, 307 nvkm_debug(&therm->subdev,
@@ -312,7 +312,7 @@ nvkm_therm_clkgate_enable(struct nvkm_therm *therm)
312void 312void
313nvkm_therm_clkgate_fini(struct nvkm_therm *therm, bool suspend) 313nvkm_therm_clkgate_fini(struct nvkm_therm *therm, bool suspend)
314{ 314{
315 if (!therm->func->clkgate_fini || !therm->clkgating_enabled) 315 if (!therm || !therm->func->clkgate_fini || !therm->clkgating_enabled)
316 return; 316 return;
317 317
318 nvkm_debug(&therm->subdev, 318 nvkm_debug(&therm->subdev,
@@ -395,7 +395,7 @@ void
395nvkm_therm_clkgate_init(struct nvkm_therm *therm, 395nvkm_therm_clkgate_init(struct nvkm_therm *therm,
396 const struct nvkm_therm_clkgate_pack *p) 396 const struct nvkm_therm_clkgate_pack *p)
397{ 397{
398 if (!therm->func->clkgate_init || !therm->clkgating_enabled) 398 if (!therm || !therm->func->clkgate_init || !therm->clkgating_enabled)
399 return; 399 return;
400 400
401 therm->func->clkgate_init(therm, p); 401 therm->func->clkgate_init(therm, p);
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 4bdbf77f7197..72c338eb5fae 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -269,13 +269,13 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev)
269 for (i = 0; i < ARRAY_SIZE(tjmax_model_table); i++) { 269 for (i = 0; i < ARRAY_SIZE(tjmax_model_table); i++) {
270 const struct tjmax_model *tm = &tjmax_model_table[i]; 270 const struct tjmax_model *tm = &tjmax_model_table[i];
271 if (c->x86_model == tm->model && 271 if (c->x86_model == tm->model &&
272 (tm->mask == ANY || c->x86_mask == tm->mask)) 272 (tm->mask == ANY || c->x86_stepping == tm->mask))
273 return tm->tjmax; 273 return tm->tjmax;
274 } 274 }
275 275
276 /* Early chips have no MSR for TjMax */ 276 /* Early chips have no MSR for TjMax */
277 277
278 if (c->x86_model == 0xf && c->x86_mask < 4) 278 if (c->x86_model == 0xf && c->x86_stepping < 4)
279 usemsr_ee = 0; 279 usemsr_ee = 0;
280 280
281 if (c->x86_model > 0xe && usemsr_ee) { 281 if (c->x86_model > 0xe && usemsr_ee) {
@@ -426,7 +426,7 @@ static int chk_ucode_version(unsigned int cpu)
426 * Readings might stop update when processor visited too deep sleep, 426 * Readings might stop update when processor visited too deep sleep,
427 * fixed for stepping D0 (6EC). 427 * fixed for stepping D0 (6EC).
428 */ 428 */
429 if (c->x86_model == 0xe && c->x86_mask < 0xc && c->microcode < 0x39) { 429 if (c->x86_model == 0xe && c->x86_stepping < 0xc && c->microcode < 0x39) {
430 pr_err("Errata AE18 not fixed, update BIOS or microcode of the CPU!\n"); 430 pr_err("Errata AE18 not fixed, update BIOS or microcode of the CPU!\n");
431 return -ENODEV; 431 return -ENODEV;
432 } 432 }
diff --git a/drivers/hwmon/hwmon-vid.c b/drivers/hwmon/hwmon-vid.c
index ef91b8a67549..84e91286fc4f 100644
--- a/drivers/hwmon/hwmon-vid.c
+++ b/drivers/hwmon/hwmon-vid.c
@@ -293,7 +293,7 @@ u8 vid_which_vrm(void)
293 if (c->x86 < 6) /* Any CPU with family lower than 6 */ 293 if (c->x86 < 6) /* Any CPU with family lower than 6 */
294 return 0; /* doesn't have VID */ 294 return 0; /* doesn't have VID */
295 295
296 vrm_ret = find_vrm(c->x86, c->x86_model, c->x86_mask, c->x86_vendor); 296 vrm_ret = find_vrm(c->x86, c->x86_model, c->x86_stepping, c->x86_vendor);
297 if (vrm_ret == 134) 297 if (vrm_ret == 134)
298 vrm_ret = get_via_model_d_vrm(); 298 vrm_ret = get_via_model_d_vrm();
299 if (vrm_ret == 0) 299 if (vrm_ret == 0)
diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index 06b4e1c78bd8..051a72eecb24 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -129,7 +129,10 @@ static ssize_t temp1_input_show(struct device *dev,
129 129
130 data->read_tempreg(data->pdev, &regval); 130 data->read_tempreg(data->pdev, &regval);
131 temp = (regval >> 21) * 125; 131 temp = (regval >> 21) * 125;
132 temp -= data->temp_offset; 132 if (temp > data->temp_offset)
133 temp -= data->temp_offset;
134 else
135 temp = 0;
133 136
134 return sprintf(buf, "%u\n", temp); 137 return sprintf(buf, "%u\n", temp);
135} 138}
@@ -227,7 +230,7 @@ static bool has_erratum_319(struct pci_dev *pdev)
227 * and AM3 formats, but that's the best we can do. 230 * and AM3 formats, but that's the best we can do.
228 */ 231 */
229 return boot_cpu_data.x86_model < 4 || 232 return boot_cpu_data.x86_model < 4 ||
230 (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask <= 2); 233 (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_stepping <= 2);
231} 234}
232 235
233static int k10temp_probe(struct pci_dev *pdev, 236static int k10temp_probe(struct pci_dev *pdev,
diff --git a/drivers/hwmon/k8temp.c b/drivers/hwmon/k8temp.c
index 5a632bcf869b..e59f9113fb93 100644
--- a/drivers/hwmon/k8temp.c
+++ b/drivers/hwmon/k8temp.c
@@ -187,7 +187,7 @@ static int k8temp_probe(struct pci_dev *pdev,
187 return -ENOMEM; 187 return -ENOMEM;
188 188
189 model = boot_cpu_data.x86_model; 189 model = boot_cpu_data.x86_model;
190 stepping = boot_cpu_data.x86_mask; 190 stepping = boot_cpu_data.x86_stepping;
191 191
192 /* feature available since SH-C0, exclude older revisions */ 192 /* feature available since SH-C0, exclude older revisions */
193 if ((model == 4 && stepping == 0) || 193 if ((model == 4 && stepping == 0) ||
diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c
index de318389a301..67de94343cb4 100644
--- a/drivers/infiniband/hw/usnic/usnic_transport.c
+++ b/drivers/infiniband/hw/usnic/usnic_transport.c
@@ -174,14 +174,13 @@ void usnic_transport_put_socket(struct socket *sock)
174int usnic_transport_sock_get_addr(struct socket *sock, int *proto, 174int usnic_transport_sock_get_addr(struct socket *sock, int *proto,
175 uint32_t *addr, uint16_t *port) 175 uint32_t *addr, uint16_t *port)
176{ 176{
177 int len;
178 int err; 177 int err;
179 struct sockaddr_in sock_addr; 178 struct sockaddr_in sock_addr;
180 179
181 err = sock->ops->getname(sock, 180 err = sock->ops->getname(sock,
182 (struct sockaddr *)&sock_addr, 181 (struct sockaddr *)&sock_addr,
183 &len, 0); 182 0);
184 if (err) 183 if (err < 0)
185 return err; 184 return err;
186 185
187 if (sock_addr.sin_family != AF_INET) 186 if (sock_addr.sin_family != AF_INET)
diff --git a/drivers/irqchip/irq-bcm7038-l1.c b/drivers/irqchip/irq-bcm7038-l1.c
index 55cfb986225b..faf734ff4cf3 100644
--- a/drivers/irqchip/irq-bcm7038-l1.c
+++ b/drivers/irqchip/irq-bcm7038-l1.c
@@ -339,9 +339,6 @@ int __init bcm7038_l1_of_init(struct device_node *dn,
339 goto out_unmap; 339 goto out_unmap;
340 } 340 }
341 341
342 pr_info("registered BCM7038 L1 intc (mem: 0x%p, IRQs: %d)\n",
343 intc->cpus[0]->map_base, IRQS_PER_WORD * intc->n_words);
344
345 return 0; 342 return 0;
346 343
347out_unmap: 344out_unmap:
diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
index 983640eba418..8968e5e93fcb 100644
--- a/drivers/irqchip/irq-bcm7120-l2.c
+++ b/drivers/irqchip/irq-bcm7120-l2.c
@@ -318,9 +318,6 @@ static int __init bcm7120_l2_intc_probe(struct device_node *dn,
318 } 318 }
319 } 319 }
320 320
321 pr_info("registered %s intc (mem: 0x%p, parent IRQ(s): %d)\n",
322 intc_name, data->map_base[0], data->num_parent_irqs);
323
324 return 0; 321 return 0;
325 322
326out_free_domain: 323out_free_domain:
diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c
index 691d20eb0bec..0e65f609352e 100644
--- a/drivers/irqchip/irq-brcmstb-l2.c
+++ b/drivers/irqchip/irq-brcmstb-l2.c
@@ -262,9 +262,6 @@ static int __init brcmstb_l2_intc_of_init(struct device_node *np,
262 ct->chip.irq_set_wake = irq_gc_set_wake; 262 ct->chip.irq_set_wake = irq_gc_set_wake;
263 } 263 }
264 264
265 pr_info("registered L2 intc (mem: 0x%p, parent irq: %d)\n",
266 base, parent_irq);
267
268 return 0; 265 return 0;
269 266
270out_free_domain: 267out_free_domain:
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index 993a8426a453..1ff38aff9f29 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -94,7 +94,7 @@ static struct irq_chip gicv2m_msi_irq_chip = {
94 94
95static struct msi_domain_info gicv2m_msi_domain_info = { 95static struct msi_domain_info gicv2m_msi_domain_info = {
96 .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | 96 .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
97 MSI_FLAG_PCI_MSIX), 97 MSI_FLAG_PCI_MSIX | MSI_FLAG_MULTI_PCI_MSI),
98 .chip = &gicv2m_msi_irq_chip, 98 .chip = &gicv2m_msi_irq_chip,
99}; 99};
100 100
@@ -155,18 +155,12 @@ static int gicv2m_irq_gic_domain_alloc(struct irq_domain *domain,
155 return 0; 155 return 0;
156} 156}
157 157
158static void gicv2m_unalloc_msi(struct v2m_data *v2m, unsigned int hwirq) 158static void gicv2m_unalloc_msi(struct v2m_data *v2m, unsigned int hwirq,
159 int nr_irqs)
159{ 160{
160 int pos;
161
162 pos = hwirq - v2m->spi_start;
163 if (pos < 0 || pos >= v2m->nr_spis) {
164 pr_err("Failed to teardown msi. Invalid hwirq %d\n", hwirq);
165 return;
166 }
167
168 spin_lock(&v2m_lock); 161 spin_lock(&v2m_lock);
169 __clear_bit(pos, v2m->bm); 162 bitmap_release_region(v2m->bm, hwirq - v2m->spi_start,
163 get_count_order(nr_irqs));
170 spin_unlock(&v2m_lock); 164 spin_unlock(&v2m_lock);
171} 165}
172 166
@@ -174,13 +168,13 @@ static int gicv2m_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
174 unsigned int nr_irqs, void *args) 168 unsigned int nr_irqs, void *args)
175{ 169{
176 struct v2m_data *v2m = NULL, *tmp; 170 struct v2m_data *v2m = NULL, *tmp;
177 int hwirq, offset, err = 0; 171 int hwirq, offset, i, err = 0;
178 172
179 spin_lock(&v2m_lock); 173 spin_lock(&v2m_lock);
180 list_for_each_entry(tmp, &v2m_nodes, entry) { 174 list_for_each_entry(tmp, &v2m_nodes, entry) {
181 offset = find_first_zero_bit(tmp->bm, tmp->nr_spis); 175 offset = bitmap_find_free_region(tmp->bm, tmp->nr_spis,
182 if (offset < tmp->nr_spis) { 176 get_count_order(nr_irqs));
183 __set_bit(offset, tmp->bm); 177 if (offset >= 0) {
184 v2m = tmp; 178 v2m = tmp;
185 break; 179 break;
186 } 180 }
@@ -192,16 +186,21 @@ static int gicv2m_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
192 186
193 hwirq = v2m->spi_start + offset; 187 hwirq = v2m->spi_start + offset;
194 188
195 err = gicv2m_irq_gic_domain_alloc(domain, virq, hwirq); 189 for (i = 0; i < nr_irqs; i++) {
196 if (err) { 190 err = gicv2m_irq_gic_domain_alloc(domain, virq + i, hwirq + i);
197 gicv2m_unalloc_msi(v2m, hwirq); 191 if (err)
198 return err; 192 goto fail;
199 }
200 193
201 irq_domain_set_hwirq_and_chip(domain, virq, hwirq, 194 irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
202 &gicv2m_irq_chip, v2m); 195 &gicv2m_irq_chip, v2m);
196 }
203 197
204 return 0; 198 return 0;
199
200fail:
201 irq_domain_free_irqs_parent(domain, virq, nr_irqs);
202 gicv2m_unalloc_msi(v2m, hwirq, get_count_order(nr_irqs));
203 return err;
205} 204}
206 205
207static void gicv2m_irq_domain_free(struct irq_domain *domain, 206static void gicv2m_irq_domain_free(struct irq_domain *domain,
@@ -210,8 +209,7 @@ static void gicv2m_irq_domain_free(struct irq_domain *domain,
210 struct irq_data *d = irq_domain_get_irq_data(domain, virq); 209 struct irq_data *d = irq_domain_get_irq_data(domain, virq);
211 struct v2m_data *v2m = irq_data_get_irq_chip_data(d); 210 struct v2m_data *v2m = irq_data_get_irq_chip_data(d);
212 211
213 BUG_ON(nr_irqs != 1); 212 gicv2m_unalloc_msi(v2m, d->hwirq, nr_irqs);
214 gicv2m_unalloc_msi(v2m, d->hwirq);
215 irq_domain_free_irqs_parent(domain, virq, nr_irqs); 213 irq_domain_free_irqs_parent(domain, virq, nr_irqs);
216} 214}
217 215
diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
index 14a8c0a7e095..25a98de5cfb2 100644
--- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
@@ -132,6 +132,8 @@ static int __init its_pci_of_msi_init(void)
132 132
133 for (np = of_find_matching_node(NULL, its_device_id); np; 133 for (np = of_find_matching_node(NULL, its_device_id); np;
134 np = of_find_matching_node(np, its_device_id)) { 134 np = of_find_matching_node(np, its_device_id)) {
135 if (!of_device_is_available(np))
136 continue;
135 if (!of_property_read_bool(np, "msi-controller")) 137 if (!of_property_read_bool(np, "msi-controller"))
136 continue; 138 continue;
137 139
diff --git a/drivers/irqchip/irq-gic-v3-its-platform-msi.c b/drivers/irqchip/irq-gic-v3-its-platform-msi.c
index 833a90fe33ae..8881a053c173 100644
--- a/drivers/irqchip/irq-gic-v3-its-platform-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-platform-msi.c
@@ -154,6 +154,8 @@ static void __init its_pmsi_of_init(void)
154 154
155 for (np = of_find_matching_node(NULL, its_device_id); np; 155 for (np = of_find_matching_node(NULL, its_device_id); np;
156 np = of_find_matching_node(np, its_device_id)) { 156 np = of_find_matching_node(np, its_device_id)) {
157 if (!of_device_is_available(np))
158 continue;
157 if (!of_property_read_bool(np, "msi-controller")) 159 if (!of_property_read_bool(np, "msi-controller"))
158 continue; 160 continue;
159 161
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 06f025fd5726..1d3056f53747 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -3314,6 +3314,8 @@ static int __init its_of_probe(struct device_node *node)
3314 3314
3315 for (np = of_find_matching_node(node, its_device_id); np; 3315 for (np = of_find_matching_node(node, its_device_id); np;
3316 np = of_find_matching_node(np, its_device_id)) { 3316 np = of_find_matching_node(np, its_device_id)) {
3317 if (!of_device_is_available(np))
3318 continue;
3317 if (!of_property_read_bool(np, "msi-controller")) { 3319 if (!of_property_read_bool(np, "msi-controller")) {
3318 pr_warn("%pOF: no msi-controller property, ITS ignored\n", 3320 pr_warn("%pOF: no msi-controller property, ITS ignored\n",
3319 np); 3321 np);
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index a57c0fbbd34a..d99cc07903ec 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -673,7 +673,7 @@ static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq)
673 MPIDR_TO_SGI_RS(cluster_id) | 673 MPIDR_TO_SGI_RS(cluster_id) |
674 tlist << ICC_SGI1R_TARGET_LIST_SHIFT); 674 tlist << ICC_SGI1R_TARGET_LIST_SHIFT);
675 675
676 pr_debug("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val); 676 pr_devel("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val);
677 gic_write_sgi1r(val); 677 gic_write_sgi1r(val);
678} 678}
679 679
@@ -688,7 +688,7 @@ static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
688 * Ensure that stores to Normal memory are visible to the 688 * Ensure that stores to Normal memory are visible to the
689 * other CPUs before issuing the IPI. 689 * other CPUs before issuing the IPI.
690 */ 690 */
691 smp_wmb(); 691 wmb();
692 692
693 for_each_cpu(cpu, mask) { 693 for_each_cpu(cpu, mask) {
694 u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID(cpu_logical_map(cpu)); 694 u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID(cpu_logical_map(cpu));
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index ef92a4d2038e..d32268cc1174 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -424,8 +424,6 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq,
424 spin_lock_irqsave(&gic_lock, flags); 424 spin_lock_irqsave(&gic_lock, flags);
425 write_gic_map_pin(intr, GIC_MAP_PIN_MAP_TO_PIN | gic_cpu_pin); 425 write_gic_map_pin(intr, GIC_MAP_PIN_MAP_TO_PIN | gic_cpu_pin);
426 write_gic_map_vp(intr, BIT(mips_cm_vp_id(cpu))); 426 write_gic_map_vp(intr, BIT(mips_cm_vp_id(cpu)));
427 gic_clear_pcpu_masks(intr);
428 set_bit(intr, per_cpu_ptr(pcpu_masks, cpu));
429 irq_data_update_effective_affinity(data, cpumask_of(cpu)); 427 irq_data_update_effective_affinity(data, cpumask_of(cpu));
430 spin_unlock_irqrestore(&gic_lock, flags); 428 spin_unlock_irqrestore(&gic_lock, flags);
431 429
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index c5603d1a07d6..1f8f489b4167 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -560,7 +560,7 @@ done:
560 560
561static int 561static int
562data_sock_getname(struct socket *sock, struct sockaddr *addr, 562data_sock_getname(struct socket *sock, struct sockaddr *addr,
563 int *addr_len, int peer) 563 int peer)
564{ 564{
565 struct sockaddr_mISDN *maddr = (struct sockaddr_mISDN *) addr; 565 struct sockaddr_mISDN *maddr = (struct sockaddr_mISDN *) addr;
566 struct sock *sk = sock->sk; 566 struct sock *sk = sock->sk;
@@ -570,14 +570,13 @@ data_sock_getname(struct socket *sock, struct sockaddr *addr,
570 570
571 lock_sock(sk); 571 lock_sock(sk);
572 572
573 *addr_len = sizeof(*maddr);
574 maddr->family = AF_ISDN; 573 maddr->family = AF_ISDN;
575 maddr->dev = _pms(sk)->dev->id; 574 maddr->dev = _pms(sk)->dev->id;
576 maddr->channel = _pms(sk)->ch.nr; 575 maddr->channel = _pms(sk)->ch.nr;
577 maddr->sapi = _pms(sk)->ch.addr & 0xff; 576 maddr->sapi = _pms(sk)->ch.addr & 0xff;
578 maddr->tei = (_pms(sk)->ch.addr >> 8) & 0xff; 577 maddr->tei = (_pms(sk)->ch.addr >> 8) & 0xff;
579 release_sock(sk); 578 release_sock(sk);
580 return 0; 579 return sizeof(*maddr);
581} 580}
582 581
583static const struct proto_ops data_sock_ops = { 582static const struct proto_ops data_sock_ops = {
diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c
index 62f541f968f6..07074820a167 100644
--- a/drivers/macintosh/macio_asic.c
+++ b/drivers/macintosh/macio_asic.c
@@ -375,6 +375,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip,
375 dev->ofdev.dev.of_node = np; 375 dev->ofdev.dev.of_node = np;
376 dev->ofdev.archdata.dma_mask = 0xffffffffUL; 376 dev->ofdev.archdata.dma_mask = 0xffffffffUL;
377 dev->ofdev.dev.dma_mask = &dev->ofdev.archdata.dma_mask; 377 dev->ofdev.dev.dma_mask = &dev->ofdev.archdata.dma_mask;
378 dev->ofdev.dev.coherent_dma_mask = dev->ofdev.archdata.dma_mask;
378 dev->ofdev.dev.parent = parent; 379 dev->ofdev.dev.parent = parent;
379 dev->ofdev.dev.bus = &macio_bus_type; 380 dev->ofdev.dev.bus = &macio_bus_type;
380 dev->ofdev.dev.release = macio_release_dev; 381 dev->ofdev.dev.release = macio_release_dev;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index d6de00f367ef..68136806d365 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -903,7 +903,8 @@ static void dec_pending(struct dm_io *io, blk_status_t error)
903 queue_io(md, bio); 903 queue_io(md, bio);
904 } else { 904 } else {
905 /* done with normal IO or empty flush */ 905 /* done with normal IO or empty flush */
906 bio->bi_status = io_error; 906 if (io_error)
907 bio->bi_status = io_error;
907 bio_endio(bio); 908 bio_endio(bio);
908 } 909 }
909 } 910 }
diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
index d9aa407db06a..2dd2db9bc1c9 100644
--- a/drivers/misc/ocxl/file.c
+++ b/drivers/misc/ocxl/file.c
@@ -277,7 +277,7 @@ static ssize_t afu_read(struct file *file, char __user *buf, size_t count,
277 struct ocxl_context *ctx = file->private_data; 277 struct ocxl_context *ctx = file->private_data;
278 struct ocxl_kernel_event_header header; 278 struct ocxl_kernel_event_header header;
279 ssize_t rc; 279 ssize_t rc;
280 size_t used = 0; 280 ssize_t used = 0;
281 DEFINE_WAIT(event_wait); 281 DEFINE_WAIT(event_wait);
282 282
283 memset(&header, 0, sizeof(header)); 283 memset(&header, 0, sizeof(header));
diff --git a/drivers/mmc/host/bcm2835.c b/drivers/mmc/host/bcm2835.c
index 229dc18f0581..768972af8b85 100644
--- a/drivers/mmc/host/bcm2835.c
+++ b/drivers/mmc/host/bcm2835.c
@@ -1265,7 +1265,8 @@ static int bcm2835_add_host(struct bcm2835_host *host)
1265 char pio_limit_string[20]; 1265 char pio_limit_string[20];
1266 int ret; 1266 int ret;
1267 1267
1268 mmc->f_max = host->max_clk; 1268 if (!mmc->f_max || mmc->f_max > host->max_clk)
1269 mmc->f_max = host->max_clk;
1269 mmc->f_min = host->max_clk / SDCDIV_MAX_CDIV; 1270 mmc->f_min = host->max_clk / SDCDIV_MAX_CDIV;
1270 1271
1271 mmc->max_busy_timeout = ~0 / (mmc->f_max / 1000); 1272 mmc->max_busy_timeout = ~0 / (mmc->f_max / 1000);
diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index 22438ebfe4e6..4f972b879fe6 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -717,22 +717,6 @@ static int meson_mmc_clk_phase_tuning(struct mmc_host *mmc, u32 opcode,
717static int meson_mmc_execute_tuning(struct mmc_host *mmc, u32 opcode) 717static int meson_mmc_execute_tuning(struct mmc_host *mmc, u32 opcode)
718{ 718{
719 struct meson_host *host = mmc_priv(mmc); 719 struct meson_host *host = mmc_priv(mmc);
720 int ret;
721
722 /*
723 * If this is the initial tuning, try to get a sane Rx starting
724 * phase before doing the actual tuning.
725 */
726 if (!mmc->doing_retune) {
727 ret = meson_mmc_clk_phase_tuning(mmc, opcode, host->rx_clk);
728
729 if (ret)
730 return ret;
731 }
732
733 ret = meson_mmc_clk_phase_tuning(mmc, opcode, host->tx_clk);
734 if (ret)
735 return ret;
736 720
737 return meson_mmc_clk_phase_tuning(mmc, opcode, host->rx_clk); 721 return meson_mmc_clk_phase_tuning(mmc, opcode, host->rx_clk);
738} 722}
@@ -763,9 +747,8 @@ static void meson_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
763 if (!IS_ERR(mmc->supply.vmmc)) 747 if (!IS_ERR(mmc->supply.vmmc))
764 mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, ios->vdd); 748 mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, ios->vdd);
765 749
766 /* Reset phases */ 750 /* Reset rx phase */
767 clk_set_phase(host->rx_clk, 0); 751 clk_set_phase(host->rx_clk, 0);
768 clk_set_phase(host->tx_clk, 270);
769 752
770 break; 753 break;
771 754
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index e6b8c59f2c0d..736ac887303c 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -328,7 +328,7 @@ config MTD_NAND_MARVELL
328 tristate "NAND controller support on Marvell boards" 328 tristate "NAND controller support on Marvell boards"
329 depends on PXA3xx || ARCH_MMP || PLAT_ORION || ARCH_MVEBU || \ 329 depends on PXA3xx || ARCH_MMP || PLAT_ORION || ARCH_MVEBU || \
330 COMPILE_TEST 330 COMPILE_TEST
331 depends on HAS_IOMEM 331 depends on HAS_IOMEM && HAS_DMA
332 help 332 help
333 This enables the NAND flash controller driver for Marvell boards, 333 This enables the NAND flash controller driver for Marvell boards,
334 including: 334 including:
diff --git a/drivers/mtd/nand/vf610_nfc.c b/drivers/mtd/nand/vf610_nfc.c
index 80d31a58e558..f367144f3c6f 100644
--- a/drivers/mtd/nand/vf610_nfc.c
+++ b/drivers/mtd/nand/vf610_nfc.c
@@ -752,10 +752,8 @@ static int vf610_nfc_probe(struct platform_device *pdev)
752 if (mtd->oobsize > 64) 752 if (mtd->oobsize > 64)
753 mtd->oobsize = 64; 753 mtd->oobsize = 64;
754 754
755 /* 755 /* Use default large page ECC layout defined in NAND core */
756 * mtd->ecclayout is not specified here because we're using the 756 mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
757 * default large page ECC layout defined in NAND core.
758 */
759 if (chip->ecc.strength == 32) { 757 if (chip->ecc.strength == 32) {
760 nfc->ecc_mode = ECC_60_BYTE; 758 nfc->ecc_mode = ECC_60_BYTE;
761 chip->ecc.bytes = 60; 759 chip->ecc.bytes = 60;
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 944ec3c9282c..d88b78a17440 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -149,9 +149,8 @@ config MACVTAP
149config IPVLAN 149config IPVLAN
150 tristate "IP-VLAN support" 150 tristate "IP-VLAN support"
151 depends on INET 151 depends on INET
152 depends on IPV6
153 depends on NETFILTER 152 depends on NETFILTER
154 depends on NET_L3_MASTER_DEV 153 select NET_L3_MASTER_DEV
155 ---help--- 154 ---help---
156 This allows one to create virtual devices off of a main interface 155 This allows one to create virtual devices off of a main interface
157 and packets will be delivered based on the dest L3 (IPv6/IPv4 addr) 156 and packets will be delivered based on the dest L3 (IPv6/IPv4 addr)
diff --git a/drivers/net/Space.c b/drivers/net/Space.c
index 11fe71278f40..64333ec999ac 100644
--- a/drivers/net/Space.c
+++ b/drivers/net/Space.c
@@ -114,9 +114,6 @@ static struct devprobe2 m68k_probes[] __initdata = {
114#ifdef CONFIG_MVME147_NET /* MVME147 internal Ethernet */ 114#ifdef CONFIG_MVME147_NET /* MVME147 internal Ethernet */
115 {mvme147lance_probe, 0}, 115 {mvme147lance_probe, 0},
116#endif 116#endif
117#ifdef CONFIG_MAC8390 /* NuBus NS8390-based cards */
118 {mac8390_probe, 0},
119#endif
120#ifdef CONFIG_MAC89x0 117#ifdef CONFIG_MAC89x0
121 {mac89x0_probe, 0}, 118 {mac89x0_probe, 0},
122#endif 119#endif
diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig
index 1aaa7a95ebc4..ae9e7f7cb31c 100644
--- a/drivers/net/dsa/mv88e6xxx/Kconfig
+++ b/drivers/net/dsa/mv88e6xxx/Kconfig
@@ -18,3 +18,13 @@ config NET_DSA_MV88E6XXX_GLOBAL2
18 18
19 It is required on most chips. If the chip you compile the support for 19 It is required on most chips. If the chip you compile the support for
20 doesn't have such registers set, say N here. In doubt, say Y. 20 doesn't have such registers set, say N here. In doubt, say Y.
21
22config NET_DSA_MV88E6XXX_PTP
23 bool "PTP support for Marvell 88E6xxx"
24 default n
25 depends on NET_DSA_MV88E6XXX_GLOBAL2
26 imply NETWORK_PHY_TIMESTAMPING
27 imply PTP_1588_CLOCK
28 help
29 Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch
30 chips that support it.
diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile
index 58a4a0014e59..50de304abe2f 100644
--- a/drivers/net/dsa/mv88e6xxx/Makefile
+++ b/drivers/net/dsa/mv88e6xxx/Makefile
@@ -5,6 +5,10 @@ mv88e6xxx-objs += global1.o
5mv88e6xxx-objs += global1_atu.o 5mv88e6xxx-objs += global1_atu.o
6mv88e6xxx-objs += global1_vtu.o 6mv88e6xxx-objs += global1_vtu.o
7mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o 7mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o
8mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_avb.o
9mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_scratch.o
10mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += hwtstamp.o
8mv88e6xxx-objs += phy.o 11mv88e6xxx-objs += phy.o
9mv88e6xxx-objs += port.o 12mv88e6xxx-objs += port.o
13mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += ptp.o
10mv88e6xxx-objs += serdes.o 14mv88e6xxx-objs += serdes.o
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index eb328bade225..39c7ad7e490f 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -36,8 +36,10 @@
36#include "chip.h" 36#include "chip.h"
37#include "global1.h" 37#include "global1.h"
38#include "global2.h" 38#include "global2.h"
39#include "hwtstamp.h"
39#include "phy.h" 40#include "phy.h"
40#include "port.h" 41#include "port.h"
42#include "ptp.h"
41#include "serdes.h" 43#include "serdes.h"
42 44
43static void assert_reg_lock(struct mv88e6xxx_chip *chip) 45static void assert_reg_lock(struct mv88e6xxx_chip *chip)
@@ -712,9 +714,12 @@ static void mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
712 for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) { 714 for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) {
713 stat = &mv88e6xxx_hw_stats[i]; 715 stat = &mv88e6xxx_hw_stats[i];
714 if (stat->type & types) { 716 if (stat->type & types) {
717 mutex_lock(&chip->reg_lock);
715 data[j] = _mv88e6xxx_get_ethtool_stat(chip, stat, port, 718 data[j] = _mv88e6xxx_get_ethtool_stat(chip, stat, port,
716 bank1_select, 719 bank1_select,
717 histogram); 720 histogram);
721 mutex_unlock(&chip->reg_lock);
722
718 j++; 723 j++;
719 } 724 }
720 } 725 }
@@ -762,14 +767,13 @@ static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port,
762 mutex_lock(&chip->reg_lock); 767 mutex_lock(&chip->reg_lock);
763 768
764 ret = mv88e6xxx_stats_snapshot(chip, port); 769 ret = mv88e6xxx_stats_snapshot(chip, port);
765 if (ret < 0) { 770 mutex_unlock(&chip->reg_lock);
766 mutex_unlock(&chip->reg_lock); 771
772 if (ret < 0)
767 return; 773 return;
768 }
769 774
770 mv88e6xxx_get_stats(chip, port, data); 775 mv88e6xxx_get_stats(chip, port, data);
771 776
772 mutex_unlock(&chip->reg_lock);
773} 777}
774 778
775static int mv88e6xxx_stats_set_histogram(struct mv88e6xxx_chip *chip) 779static int mv88e6xxx_stats_set_histogram(struct mv88e6xxx_chip *chip)
@@ -1433,7 +1437,9 @@ static int mv88e6xxx_port_db_dump_fid(struct mv88e6xxx_chip *chip,
1433 eth_broadcast_addr(addr.mac); 1437 eth_broadcast_addr(addr.mac);
1434 1438
1435 do { 1439 do {
1440 mutex_lock(&chip->reg_lock);
1436 err = mv88e6xxx_g1_atu_getnext(chip, fid, &addr); 1441 err = mv88e6xxx_g1_atu_getnext(chip, fid, &addr);
1442 mutex_unlock(&chip->reg_lock);
1437 if (err) 1443 if (err)
1438 return err; 1444 return err;
1439 1445
@@ -1466,7 +1472,10 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
1466 int err; 1472 int err;
1467 1473
1468 /* Dump port's default Filtering Information Database (VLAN ID 0) */ 1474 /* Dump port's default Filtering Information Database (VLAN ID 0) */
1475 mutex_lock(&chip->reg_lock);
1469 err = mv88e6xxx_port_get_fid(chip, port, &fid); 1476 err = mv88e6xxx_port_get_fid(chip, port, &fid);
1477 mutex_unlock(&chip->reg_lock);
1478
1470 if (err) 1479 if (err)
1471 return err; 1480 return err;
1472 1481
@@ -1476,7 +1485,9 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
1476 1485
1477 /* Dump VLANs' Filtering Information Databases */ 1486 /* Dump VLANs' Filtering Information Databases */
1478 do { 1487 do {
1488 mutex_lock(&chip->reg_lock);
1479 err = mv88e6xxx_vtu_getnext(chip, &vlan); 1489 err = mv88e6xxx_vtu_getnext(chip, &vlan);
1490 mutex_unlock(&chip->reg_lock);
1480 if (err) 1491 if (err)
1481 return err; 1492 return err;
1482 1493
@@ -1496,13 +1507,8 @@ static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port,
1496 dsa_fdb_dump_cb_t *cb, void *data) 1507 dsa_fdb_dump_cb_t *cb, void *data)
1497{ 1508{
1498 struct mv88e6xxx_chip *chip = ds->priv; 1509 struct mv88e6xxx_chip *chip = ds->priv;
1499 int err;
1500
1501 mutex_lock(&chip->reg_lock);
1502 err = mv88e6xxx_port_db_dump(chip, port, cb, data);
1503 mutex_unlock(&chip->reg_lock);
1504 1510
1505 return err; 1511 return mv88e6xxx_port_db_dump(chip, port, cb, data);
1506} 1512}
1507 1513
1508static int mv88e6xxx_bridge_map(struct mv88e6xxx_chip *chip, 1514static int mv88e6xxx_bridge_map(struct mv88e6xxx_chip *chip,
@@ -2092,6 +2098,17 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
2092 if (err) 2098 if (err)
2093 goto unlock; 2099 goto unlock;
2094 2100
2101 /* Setup PTP Hardware Clock and timestamping */
2102 if (chip->info->ptp_support) {
2103 err = mv88e6xxx_ptp_setup(chip);
2104 if (err)
2105 goto unlock;
2106
2107 err = mv88e6xxx_hwtstamp_setup(chip);
2108 if (err)
2109 goto unlock;
2110 }
2111
2095unlock: 2112unlock:
2096 mutex_unlock(&chip->reg_lock); 2113 mutex_unlock(&chip->reg_lock);
2097 2114
@@ -2472,6 +2489,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = {
2472 .reset = mv88e6352_g1_reset, 2489 .reset = mv88e6352_g1_reset,
2473 .vtu_getnext = mv88e6352_g1_vtu_getnext, 2490 .vtu_getnext = mv88e6352_g1_vtu_getnext,
2474 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, 2491 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
2492 .gpio_ops = &mv88e6352_gpio_ops,
2475}; 2493};
2476 2494
2477static const struct mv88e6xxx_ops mv88e6161_ops = { 2495static const struct mv88e6xxx_ops mv88e6161_ops = {
@@ -2602,6 +2620,7 @@ static const struct mv88e6xxx_ops mv88e6172_ops = {
2602 .vtu_getnext = mv88e6352_g1_vtu_getnext, 2620 .vtu_getnext = mv88e6352_g1_vtu_getnext,
2603 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, 2621 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
2604 .serdes_power = mv88e6352_serdes_power, 2622 .serdes_power = mv88e6352_serdes_power,
2623 .gpio_ops = &mv88e6352_gpio_ops,
2605}; 2624};
2606 2625
2607static const struct mv88e6xxx_ops mv88e6175_ops = { 2626static const struct mv88e6xxx_ops mv88e6175_ops = {
@@ -2673,6 +2692,7 @@ static const struct mv88e6xxx_ops mv88e6176_ops = {
2673 .vtu_getnext = mv88e6352_g1_vtu_getnext, 2692 .vtu_getnext = mv88e6352_g1_vtu_getnext,
2674 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, 2693 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
2675 .serdes_power = mv88e6352_serdes_power, 2694 .serdes_power = mv88e6352_serdes_power,
2695 .gpio_ops = &mv88e6352_gpio_ops,
2676}; 2696};
2677 2697
2678static const struct mv88e6xxx_ops mv88e6185_ops = { 2698static const struct mv88e6xxx_ops mv88e6185_ops = {
@@ -2736,6 +2756,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = {
2736 .vtu_getnext = mv88e6390_g1_vtu_getnext, 2756 .vtu_getnext = mv88e6390_g1_vtu_getnext,
2737 .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, 2757 .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
2738 .serdes_power = mv88e6390_serdes_power, 2758 .serdes_power = mv88e6390_serdes_power,
2759 .gpio_ops = &mv88e6352_gpio_ops,
2739}; 2760};
2740 2761
2741static const struct mv88e6xxx_ops mv88e6190x_ops = { 2762static const struct mv88e6xxx_ops mv88e6190x_ops = {
@@ -2771,6 +2792,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = {
2771 .vtu_getnext = mv88e6390_g1_vtu_getnext, 2792 .vtu_getnext = mv88e6390_g1_vtu_getnext,
2772 .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, 2793 .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
2773 .serdes_power = mv88e6390_serdes_power, 2794 .serdes_power = mv88e6390_serdes_power,
2795 .gpio_ops = &mv88e6352_gpio_ops,
2774}; 2796};
2775 2797
2776static const struct mv88e6xxx_ops mv88e6191_ops = { 2798static const struct mv88e6xxx_ops mv88e6191_ops = {
@@ -2843,6 +2865,8 @@ static const struct mv88e6xxx_ops mv88e6240_ops = {
2843 .vtu_getnext = mv88e6352_g1_vtu_getnext, 2865 .vtu_getnext = mv88e6352_g1_vtu_getnext,
2844 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, 2866 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
2845 .serdes_power = mv88e6352_serdes_power, 2867 .serdes_power = mv88e6352_serdes_power,
2868 .gpio_ops = &mv88e6352_gpio_ops,
2869 .avb_ops = &mv88e6352_avb_ops,
2846}; 2870};
2847 2871
2848static const struct mv88e6xxx_ops mv88e6290_ops = { 2872static const struct mv88e6xxx_ops mv88e6290_ops = {
@@ -2879,6 +2903,8 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
2879 .vtu_getnext = mv88e6390_g1_vtu_getnext, 2903 .vtu_getnext = mv88e6390_g1_vtu_getnext,
2880 .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, 2904 .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
2881 .serdes_power = mv88e6390_serdes_power, 2905 .serdes_power = mv88e6390_serdes_power,
2906 .gpio_ops = &mv88e6352_gpio_ops,
2907 .avb_ops = &mv88e6390_avb_ops,
2882}; 2908};
2883 2909
2884static const struct mv88e6xxx_ops mv88e6320_ops = { 2910static const struct mv88e6xxx_ops mv88e6320_ops = {
@@ -2913,6 +2939,8 @@ static const struct mv88e6xxx_ops mv88e6320_ops = {
2913 .reset = mv88e6352_g1_reset, 2939 .reset = mv88e6352_g1_reset,
2914 .vtu_getnext = mv88e6185_g1_vtu_getnext, 2940 .vtu_getnext = mv88e6185_g1_vtu_getnext,
2915 .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge, 2941 .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
2942 .gpio_ops = &mv88e6352_gpio_ops,
2943 .avb_ops = &mv88e6352_avb_ops,
2916}; 2944};
2917 2945
2918static const struct mv88e6xxx_ops mv88e6321_ops = { 2946static const struct mv88e6xxx_ops mv88e6321_ops = {
@@ -2945,6 +2973,8 @@ static const struct mv88e6xxx_ops mv88e6321_ops = {
2945 .reset = mv88e6352_g1_reset, 2973 .reset = mv88e6352_g1_reset,
2946 .vtu_getnext = mv88e6185_g1_vtu_getnext, 2974 .vtu_getnext = mv88e6185_g1_vtu_getnext,
2947 .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge, 2975 .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
2976 .gpio_ops = &mv88e6352_gpio_ops,
2977 .avb_ops = &mv88e6352_avb_ops,
2948}; 2978};
2949 2979
2950static const struct mv88e6xxx_ops mv88e6341_ops = { 2980static const struct mv88e6xxx_ops mv88e6341_ops = {
@@ -2981,6 +3011,8 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
2981 .reset = mv88e6352_g1_reset, 3011 .reset = mv88e6352_g1_reset,
2982 .vtu_getnext = mv88e6352_g1_vtu_getnext, 3012 .vtu_getnext = mv88e6352_g1_vtu_getnext,
2983 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, 3013 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
3014 .gpio_ops = &mv88e6352_gpio_ops,
3015 .avb_ops = &mv88e6390_avb_ops,
2984}; 3016};
2985 3017
2986static const struct mv88e6xxx_ops mv88e6350_ops = { 3018static const struct mv88e6xxx_ops mv88e6350_ops = {
@@ -3049,6 +3081,7 @@ static const struct mv88e6xxx_ops mv88e6351_ops = {
3049 .reset = mv88e6352_g1_reset, 3081 .reset = mv88e6352_g1_reset,
3050 .vtu_getnext = mv88e6352_g1_vtu_getnext, 3082 .vtu_getnext = mv88e6352_g1_vtu_getnext,
3051 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, 3083 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
3084 .avb_ops = &mv88e6352_avb_ops,
3052}; 3085};
3053 3086
3054static const struct mv88e6xxx_ops mv88e6352_ops = { 3087static const struct mv88e6xxx_ops mv88e6352_ops = {
@@ -3086,6 +3119,8 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
3086 .vtu_getnext = mv88e6352_g1_vtu_getnext, 3119 .vtu_getnext = mv88e6352_g1_vtu_getnext,
3087 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, 3120 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
3088 .serdes_power = mv88e6352_serdes_power, 3121 .serdes_power = mv88e6352_serdes_power,
3122 .gpio_ops = &mv88e6352_gpio_ops,
3123 .avb_ops = &mv88e6352_avb_ops,
3089}; 3124};
3090 3125
3091static const struct mv88e6xxx_ops mv88e6390_ops = { 3126static const struct mv88e6xxx_ops mv88e6390_ops = {
@@ -3124,6 +3159,8 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
3124 .vtu_getnext = mv88e6390_g1_vtu_getnext, 3159 .vtu_getnext = mv88e6390_g1_vtu_getnext,
3125 .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, 3160 .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
3126 .serdes_power = mv88e6390_serdes_power, 3161 .serdes_power = mv88e6390_serdes_power,
3162 .gpio_ops = &mv88e6352_gpio_ops,
3163 .avb_ops = &mv88e6390_avb_ops,
3127}; 3164};
3128 3165
3129static const struct mv88e6xxx_ops mv88e6390x_ops = { 3166static const struct mv88e6xxx_ops mv88e6390x_ops = {
@@ -3162,6 +3199,8 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
3162 .vtu_getnext = mv88e6390_g1_vtu_getnext, 3199 .vtu_getnext = mv88e6390_g1_vtu_getnext,
3163 .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, 3200 .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
3164 .serdes_power = mv88e6390_serdes_power, 3201 .serdes_power = mv88e6390_serdes_power,
3202 .gpio_ops = &mv88e6352_gpio_ops,
3203 .avb_ops = &mv88e6390_avb_ops,
3165}; 3204};
3166 3205
3167static const struct mv88e6xxx_info mv88e6xxx_table[] = { 3206static const struct mv88e6xxx_info mv88e6xxx_table[] = {
@@ -3267,6 +3306,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3267 .name = "Marvell 88E6341", 3306 .name = "Marvell 88E6341",
3268 .num_databases = 4096, 3307 .num_databases = 4096,
3269 .num_ports = 6, 3308 .num_ports = 6,
3309 .num_gpio = 11,
3270 .max_vid = 4095, 3310 .max_vid = 4095,
3271 .port_base_addr = 0x10, 3311 .port_base_addr = 0x10,
3272 .global1_addr = 0x1b, 3312 .global1_addr = 0x1b,
@@ -3346,6 +3386,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3346 .name = "Marvell 88E6172", 3386 .name = "Marvell 88E6172",
3347 .num_databases = 4096, 3387 .num_databases = 4096,
3348 .num_ports = 7, 3388 .num_ports = 7,
3389 .num_gpio = 15,
3349 .max_vid = 4095, 3390 .max_vid = 4095,
3350 .port_base_addr = 0x10, 3391 .port_base_addr = 0x10,
3351 .global1_addr = 0x1b, 3392 .global1_addr = 0x1b,
@@ -3386,6 +3427,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3386 .name = "Marvell 88E6176", 3427 .name = "Marvell 88E6176",
3387 .num_databases = 4096, 3428 .num_databases = 4096,
3388 .num_ports = 7, 3429 .num_ports = 7,
3430 .num_gpio = 15,
3389 .max_vid = 4095, 3431 .max_vid = 4095,
3390 .port_base_addr = 0x10, 3432 .port_base_addr = 0x10,
3391 .global1_addr = 0x1b, 3433 .global1_addr = 0x1b,
@@ -3424,6 +3466,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3424 .name = "Marvell 88E6190", 3466 .name = "Marvell 88E6190",
3425 .num_databases = 4096, 3467 .num_databases = 4096,
3426 .num_ports = 11, /* 10 + Z80 */ 3468 .num_ports = 11, /* 10 + Z80 */
3469 .num_gpio = 16,
3427 .max_vid = 8191, 3470 .max_vid = 8191,
3428 .port_base_addr = 0x0, 3471 .port_base_addr = 0x0,
3429 .global1_addr = 0x1b, 3472 .global1_addr = 0x1b,
@@ -3444,6 +3487,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3444 .name = "Marvell 88E6190X", 3487 .name = "Marvell 88E6190X",
3445 .num_databases = 4096, 3488 .num_databases = 4096,
3446 .num_ports = 11, /* 10 + Z80 */ 3489 .num_ports = 11, /* 10 + Z80 */
3490 .num_gpio = 16,
3447 .max_vid = 8191, 3491 .max_vid = 8191,
3448 .port_base_addr = 0x0, 3492 .port_base_addr = 0x0,
3449 .global1_addr = 0x1b, 3493 .global1_addr = 0x1b,
@@ -3475,6 +3519,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3475 .pvt = true, 3519 .pvt = true,
3476 .multi_chip = true, 3520 .multi_chip = true,
3477 .tag_protocol = DSA_TAG_PROTO_DSA, 3521 .tag_protocol = DSA_TAG_PROTO_DSA,
3522 .ptp_support = true,
3478 .ops = &mv88e6191_ops, 3523 .ops = &mv88e6191_ops,
3479 }, 3524 },
3480 3525
@@ -3484,6 +3529,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3484 .name = "Marvell 88E6240", 3529 .name = "Marvell 88E6240",
3485 .num_databases = 4096, 3530 .num_databases = 4096,
3486 .num_ports = 7, 3531 .num_ports = 7,
3532 .num_gpio = 15,
3487 .max_vid = 4095, 3533 .max_vid = 4095,
3488 .port_base_addr = 0x10, 3534 .port_base_addr = 0x10,
3489 .global1_addr = 0x1b, 3535 .global1_addr = 0x1b,
@@ -3495,6 +3541,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3495 .pvt = true, 3541 .pvt = true,
3496 .multi_chip = true, 3542 .multi_chip = true,
3497 .tag_protocol = DSA_TAG_PROTO_EDSA, 3543 .tag_protocol = DSA_TAG_PROTO_EDSA,
3544 .ptp_support = true,
3498 .ops = &mv88e6240_ops, 3545 .ops = &mv88e6240_ops,
3499 }, 3546 },
3500 3547
@@ -3504,6 +3551,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3504 .name = "Marvell 88E6290", 3551 .name = "Marvell 88E6290",
3505 .num_databases = 4096, 3552 .num_databases = 4096,
3506 .num_ports = 11, /* 10 + Z80 */ 3553 .num_ports = 11, /* 10 + Z80 */
3554 .num_gpio = 16,
3507 .max_vid = 8191, 3555 .max_vid = 8191,
3508 .port_base_addr = 0x0, 3556 .port_base_addr = 0x0,
3509 .global1_addr = 0x1b, 3557 .global1_addr = 0x1b,
@@ -3515,6 +3563,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3515 .pvt = true, 3563 .pvt = true,
3516 .multi_chip = true, 3564 .multi_chip = true,
3517 .tag_protocol = DSA_TAG_PROTO_DSA, 3565 .tag_protocol = DSA_TAG_PROTO_DSA,
3566 .ptp_support = true,
3518 .ops = &mv88e6290_ops, 3567 .ops = &mv88e6290_ops,
3519 }, 3568 },
3520 3569
@@ -3524,6 +3573,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3524 .name = "Marvell 88E6320", 3573 .name = "Marvell 88E6320",
3525 .num_databases = 4096, 3574 .num_databases = 4096,
3526 .num_ports = 7, 3575 .num_ports = 7,
3576 .num_gpio = 15,
3527 .max_vid = 4095, 3577 .max_vid = 4095,
3528 .port_base_addr = 0x10, 3578 .port_base_addr = 0x10,
3529 .global1_addr = 0x1b, 3579 .global1_addr = 0x1b,
@@ -3534,6 +3584,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3534 .pvt = true, 3584 .pvt = true,
3535 .multi_chip = true, 3585 .multi_chip = true,
3536 .tag_protocol = DSA_TAG_PROTO_EDSA, 3586 .tag_protocol = DSA_TAG_PROTO_EDSA,
3587 .ptp_support = true,
3537 .ops = &mv88e6320_ops, 3588 .ops = &mv88e6320_ops,
3538 }, 3589 },
3539 3590
@@ -3543,6 +3594,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3543 .name = "Marvell 88E6321", 3594 .name = "Marvell 88E6321",
3544 .num_databases = 4096, 3595 .num_databases = 4096,
3545 .num_ports = 7, 3596 .num_ports = 7,
3597 .num_gpio = 15,
3546 .max_vid = 4095, 3598 .max_vid = 4095,
3547 .port_base_addr = 0x10, 3599 .port_base_addr = 0x10,
3548 .global1_addr = 0x1b, 3600 .global1_addr = 0x1b,
@@ -3552,6 +3604,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3552 .atu_move_port_mask = 0xf, 3604 .atu_move_port_mask = 0xf,
3553 .multi_chip = true, 3605 .multi_chip = true,
3554 .tag_protocol = DSA_TAG_PROTO_EDSA, 3606 .tag_protocol = DSA_TAG_PROTO_EDSA,
3607 .ptp_support = true,
3555 .ops = &mv88e6321_ops, 3608 .ops = &mv88e6321_ops,
3556 }, 3609 },
3557 3610
@@ -3561,6 +3614,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3561 .name = "Marvell 88E6341", 3614 .name = "Marvell 88E6341",
3562 .num_databases = 4096, 3615 .num_databases = 4096,
3563 .num_ports = 6, 3616 .num_ports = 6,
3617 .num_gpio = 11,
3564 .max_vid = 4095, 3618 .max_vid = 4095,
3565 .port_base_addr = 0x10, 3619 .port_base_addr = 0x10,
3566 .global1_addr = 0x1b, 3620 .global1_addr = 0x1b,
@@ -3571,6 +3625,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3571 .pvt = true, 3625 .pvt = true,
3572 .multi_chip = true, 3626 .multi_chip = true,
3573 .tag_protocol = DSA_TAG_PROTO_EDSA, 3627 .tag_protocol = DSA_TAG_PROTO_EDSA,
3628 .ptp_support = true,
3574 .ops = &mv88e6341_ops, 3629 .ops = &mv88e6341_ops,
3575 }, 3630 },
3576 3631
@@ -3620,6 +3675,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3620 .name = "Marvell 88E6352", 3675 .name = "Marvell 88E6352",
3621 .num_databases = 4096, 3676 .num_databases = 4096,
3622 .num_ports = 7, 3677 .num_ports = 7,
3678 .num_gpio = 15,
3623 .max_vid = 4095, 3679 .max_vid = 4095,
3624 .port_base_addr = 0x10, 3680 .port_base_addr = 0x10,
3625 .global1_addr = 0x1b, 3681 .global1_addr = 0x1b,
@@ -3631,6 +3687,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3631 .pvt = true, 3687 .pvt = true,
3632 .multi_chip = true, 3688 .multi_chip = true,
3633 .tag_protocol = DSA_TAG_PROTO_EDSA, 3689 .tag_protocol = DSA_TAG_PROTO_EDSA,
3690 .ptp_support = true,
3634 .ops = &mv88e6352_ops, 3691 .ops = &mv88e6352_ops,
3635 }, 3692 },
3636 [MV88E6390] = { 3693 [MV88E6390] = {
@@ -3639,6 +3696,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3639 .name = "Marvell 88E6390", 3696 .name = "Marvell 88E6390",
3640 .num_databases = 4096, 3697 .num_databases = 4096,
3641 .num_ports = 11, /* 10 + Z80 */ 3698 .num_ports = 11, /* 10 + Z80 */
3699 .num_gpio = 16,
3642 .max_vid = 8191, 3700 .max_vid = 8191,
3643 .port_base_addr = 0x0, 3701 .port_base_addr = 0x0,
3644 .global1_addr = 0x1b, 3702 .global1_addr = 0x1b,
@@ -3650,6 +3708,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3650 .pvt = true, 3708 .pvt = true,
3651 .multi_chip = true, 3709 .multi_chip = true,
3652 .tag_protocol = DSA_TAG_PROTO_DSA, 3710 .tag_protocol = DSA_TAG_PROTO_DSA,
3711 .ptp_support = true,
3653 .ops = &mv88e6390_ops, 3712 .ops = &mv88e6390_ops,
3654 }, 3713 },
3655 [MV88E6390X] = { 3714 [MV88E6390X] = {
@@ -3658,6 +3717,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3658 .name = "Marvell 88E6390X", 3717 .name = "Marvell 88E6390X",
3659 .num_databases = 4096, 3718 .num_databases = 4096,
3660 .num_ports = 11, /* 10 + Z80 */ 3719 .num_ports = 11, /* 10 + Z80 */
3720 .num_gpio = 16,
3661 .max_vid = 8191, 3721 .max_vid = 8191,
3662 .port_base_addr = 0x0, 3722 .port_base_addr = 0x0,
3663 .global1_addr = 0x1b, 3723 .global1_addr = 0x1b,
@@ -3669,6 +3729,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
3669 .pvt = true, 3729 .pvt = true,
3670 .multi_chip = true, 3730 .multi_chip = true,
3671 .tag_protocol = DSA_TAG_PROTO_DSA, 3731 .tag_protocol = DSA_TAG_PROTO_DSA,
3732 .ptp_support = true,
3672 .ops = &mv88e6390x_ops, 3733 .ops = &mv88e6390x_ops,
3673 }, 3734 },
3674}; 3735};
@@ -3880,6 +3941,11 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
3880 .port_mdb_del = mv88e6xxx_port_mdb_del, 3941 .port_mdb_del = mv88e6xxx_port_mdb_del,
3881 .crosschip_bridge_join = mv88e6xxx_crosschip_bridge_join, 3942 .crosschip_bridge_join = mv88e6xxx_crosschip_bridge_join,
3882 .crosschip_bridge_leave = mv88e6xxx_crosschip_bridge_leave, 3943 .crosschip_bridge_leave = mv88e6xxx_crosschip_bridge_leave,
3944 .port_hwtstamp_set = mv88e6xxx_port_hwtstamp_set,
3945 .port_hwtstamp_get = mv88e6xxx_port_hwtstamp_get,
3946 .port_txtstamp = mv88e6xxx_port_txtstamp,
3947 .port_rxtstamp = mv88e6xxx_port_rxtstamp,
3948 .get_ts_info = mv88e6xxx_get_ts_info,
3883}; 3949};
3884 3950
3885static struct dsa_switch_driver mv88e6xxx_switch_drv = { 3951static struct dsa_switch_driver mv88e6xxx_switch_drv = {
@@ -4022,6 +4088,11 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev)
4022 struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); 4088 struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev);
4023 struct mv88e6xxx_chip *chip = ds->priv; 4089 struct mv88e6xxx_chip *chip = ds->priv;
4024 4090
4091 if (chip->info->ptp_support) {
4092 mv88e6xxx_hwtstamp_free(chip);
4093 mv88e6xxx_ptp_free(chip);
4094 }
4095
4025 mv88e6xxx_phy_destroy(chip); 4096 mv88e6xxx_phy_destroy(chip);
4026 mv88e6xxx_unregister_switch(chip); 4097 mv88e6xxx_unregister_switch(chip);
4027 mv88e6xxx_mdios_unregister(chip); 4098 mv88e6xxx_mdios_unregister(chip);
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 3dba6e90adcf..97d7915f32c7 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -16,6 +16,8 @@
16#include <linux/irq.h> 16#include <linux/irq.h>
17#include <linux/gpio/consumer.h> 17#include <linux/gpio/consumer.h>
18#include <linux/phy.h> 18#include <linux/phy.h>
19#include <linux/ptp_clock_kernel.h>
20#include <linux/timecounter.h>
19#include <net/dsa.h> 21#include <net/dsa.h>
20 22
21#ifndef UINT64_MAX 23#ifndef UINT64_MAX
@@ -39,6 +41,8 @@
39#define MV88E6XXX_MAX_PVT_SWITCHES 32 41#define MV88E6XXX_MAX_PVT_SWITCHES 32
40#define MV88E6XXX_MAX_PVT_PORTS 16 42#define MV88E6XXX_MAX_PVT_PORTS 16
41 43
44#define MV88E6XXX_MAX_GPIO 16
45
42enum mv88e6xxx_egress_mode { 46enum mv88e6xxx_egress_mode {
43 MV88E6XXX_EGRESS_MODE_UNMODIFIED, 47 MV88E6XXX_EGRESS_MODE_UNMODIFIED,
44 MV88E6XXX_EGRESS_MODE_UNTAGGED, 48 MV88E6XXX_EGRESS_MODE_UNTAGGED,
@@ -105,6 +109,7 @@ struct mv88e6xxx_info {
105 const char *name; 109 const char *name;
106 unsigned int num_databases; 110 unsigned int num_databases;
107 unsigned int num_ports; 111 unsigned int num_ports;
112 unsigned int num_gpio;
108 unsigned int max_vid; 113 unsigned int max_vid;
109 unsigned int port_base_addr; 114 unsigned int port_base_addr;
110 unsigned int global1_addr; 115 unsigned int global1_addr;
@@ -126,6 +131,9 @@ struct mv88e6xxx_info {
126 */ 131 */
127 u8 atu_move_port_mask; 132 u8 atu_move_port_mask;
128 const struct mv88e6xxx_ops *ops; 133 const struct mv88e6xxx_ops *ops;
134
135 /* Supports PTP */
136 bool ptp_support;
129}; 137};
130 138
131struct mv88e6xxx_atu_entry { 139struct mv88e6xxx_atu_entry {
@@ -146,6 +154,8 @@ struct mv88e6xxx_vtu_entry {
146 154
147struct mv88e6xxx_bus_ops; 155struct mv88e6xxx_bus_ops;
148struct mv88e6xxx_irq_ops; 156struct mv88e6xxx_irq_ops;
157struct mv88e6xxx_gpio_ops;
158struct mv88e6xxx_avb_ops;
149 159
150struct mv88e6xxx_irq { 160struct mv88e6xxx_irq {
151 u16 masked; 161 u16 masked;
@@ -154,6 +164,32 @@ struct mv88e6xxx_irq {
154 unsigned int nirqs; 164 unsigned int nirqs;
155}; 165};
156 166
167/* state flags for mv88e6xxx_port_hwtstamp::state */
168enum {
169 MV88E6XXX_HWTSTAMP_ENABLED,
170 MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS,
171};
172
173struct mv88e6xxx_port_hwtstamp {
174 /* Port index */
175 int port_id;
176
177 /* Timestamping state */
178 unsigned long state;
179
180 /* Resources for receive timestamping */
181 struct sk_buff_head rx_queue;
182 struct sk_buff_head rx_queue2;
183
184 /* Resources for transmit timestamping */
185 unsigned long tx_tstamp_start;
186 struct sk_buff *tx_skb;
187 u16 tx_seq_id;
188
189 /* Current timestamp configuration */
190 struct hwtstamp_config tstamp_config;
191};
192
157struct mv88e6xxx_chip { 193struct mv88e6xxx_chip {
158 const struct mv88e6xxx_info *info; 194 const struct mv88e6xxx_info *info;
159 195
@@ -209,6 +245,26 @@ struct mv88e6xxx_chip {
209 int watchdog_irq; 245 int watchdog_irq;
210 int atu_prob_irq; 246 int atu_prob_irq;
211 int vtu_prob_irq; 247 int vtu_prob_irq;
248
249 /* GPIO resources */
250 u8 gpio_data[2];
251
252 /* This cyclecounter abstracts the switch PTP time.
253 * reg_lock must be held for any operation that read()s.
254 */
255 struct cyclecounter tstamp_cc;
256 struct timecounter tstamp_tc;
257 struct delayed_work overflow_work;
258
259 struct ptp_clock *ptp_clock;
260 struct ptp_clock_info ptp_clock_info;
261 struct delayed_work tai_event_work;
262 struct ptp_pin_desc pin_config[MV88E6XXX_MAX_GPIO];
263 u16 trig_config;
264 u16 evcap_config;
265
266 /* Per-port timestamping resources. */
267 struct mv88e6xxx_port_hwtstamp port_hwtstamp[DSA_MAX_PORTS];
212}; 268};
213 269
214struct mv88e6xxx_bus_ops { 270struct mv88e6xxx_bus_ops {
@@ -344,6 +400,12 @@ struct mv88e6xxx_ops {
344 struct mv88e6xxx_vtu_entry *entry); 400 struct mv88e6xxx_vtu_entry *entry);
345 int (*vtu_loadpurge)(struct mv88e6xxx_chip *chip, 401 int (*vtu_loadpurge)(struct mv88e6xxx_chip *chip,
346 struct mv88e6xxx_vtu_entry *entry); 402 struct mv88e6xxx_vtu_entry *entry);
403
404 /* GPIO operations */
405 const struct mv88e6xxx_gpio_ops *gpio_ops;
406
407 /* Interface to the AVB/PTP registers */
408 const struct mv88e6xxx_avb_ops *avb_ops;
347}; 409};
348 410
349struct mv88e6xxx_irq_ops { 411struct mv88e6xxx_irq_ops {
@@ -355,6 +417,42 @@ struct mv88e6xxx_irq_ops {
355 void (*irq_free)(struct mv88e6xxx_chip *chip); 417 void (*irq_free)(struct mv88e6xxx_chip *chip);
356}; 418};
357 419
420struct mv88e6xxx_gpio_ops {
421 /* Get/set data on GPIO pin */
422 int (*get_data)(struct mv88e6xxx_chip *chip, unsigned int pin);
423 int (*set_data)(struct mv88e6xxx_chip *chip, unsigned int pin,
424 int value);
425
426 /* get/set GPIO direction */
427 int (*get_dir)(struct mv88e6xxx_chip *chip, unsigned int pin);
428 int (*set_dir)(struct mv88e6xxx_chip *chip, unsigned int pin,
429 bool input);
430
431 /* get/set GPIO pin control */
432 int (*get_pctl)(struct mv88e6xxx_chip *chip, unsigned int pin,
433 int *func);
434 int (*set_pctl)(struct mv88e6xxx_chip *chip, unsigned int pin,
435 int func);
436};
437
438struct mv88e6xxx_avb_ops {
439 /* Access port-scoped Precision Time Protocol registers */
440 int (*port_ptp_read)(struct mv88e6xxx_chip *chip, int port, int addr,
441 u16 *data, int len);
442 int (*port_ptp_write)(struct mv88e6xxx_chip *chip, int port, int addr,
443 u16 data);
444
445 /* Access global Precision Time Protocol registers */
446 int (*ptp_read)(struct mv88e6xxx_chip *chip, int addr, u16 *data,
447 int len);
448 int (*ptp_write)(struct mv88e6xxx_chip *chip, int addr, u16 data);
449
450 /* Access global Time Application Interface registers */
451 int (*tai_read)(struct mv88e6xxx_chip *chip, int addr, u16 *data,
452 int len);
453 int (*tai_write)(struct mv88e6xxx_chip *chip, int addr, u16 data);
454};
455
358#define STATS_TYPE_PORT BIT(0) 456#define STATS_TYPE_PORT BIT(0)
359#define STATS_TYPE_BANK0 BIT(1) 457#define STATS_TYPE_BANK0 BIT(1)
360#define STATS_TYPE_BANK1 BIT(2) 458#define STATS_TYPE_BANK1 BIT(2)
@@ -386,6 +484,11 @@ static inline u16 mv88e6xxx_port_mask(struct mv88e6xxx_chip *chip)
386 return GENMASK(mv88e6xxx_num_ports(chip) - 1, 0); 484 return GENMASK(mv88e6xxx_num_ports(chip) - 1, 0);
387} 485}
388 486
487static inline unsigned int mv88e6xxx_num_gpio(struct mv88e6xxx_chip *chip)
488{
489 return chip->info->num_gpio;
490}
491
389int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val); 492int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val);
390int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val); 493int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val);
391int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, 494int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg,
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
index af0727877825..5f370f1fc7c4 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.c
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -20,22 +20,22 @@
20#include "global1.h" /* for MV88E6XXX_G1_STS_IRQ_DEVICE */ 20#include "global1.h" /* for MV88E6XXX_G1_STS_IRQ_DEVICE */
21#include "global2.h" 21#include "global2.h"
22 22
23static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val) 23int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
24{ 24{
25 return mv88e6xxx_read(chip, chip->info->global2_addr, reg, val); 25 return mv88e6xxx_read(chip, chip->info->global2_addr, reg, val);
26} 26}
27 27
28static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val) 28int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
29{ 29{
30 return mv88e6xxx_write(chip, chip->info->global2_addr, reg, val); 30 return mv88e6xxx_write(chip, chip->info->global2_addr, reg, val);
31} 31}
32 32
33static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update) 33int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
34{ 34{
35 return mv88e6xxx_update(chip, chip->info->global2_addr, reg, update); 35 return mv88e6xxx_update(chip, chip->info->global2_addr, reg, update);
36} 36}
37 37
38static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask) 38int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
39{ 39{
40 return mv88e6xxx_wait(chip, chip->info->global2_addr, reg, mask); 40 return mv88e6xxx_wait(chip, chip->info->global2_addr, reg, mask);
41} 41}
@@ -798,6 +798,7 @@ int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, struct mii_bus *bus,
798 val); 798 val);
799} 799}
800 800
801/* Offset 0x1B: Watchdog Control */
801static int mv88e6097_watchdog_action(struct mv88e6xxx_chip *chip, int irq) 802static int mv88e6097_watchdog_action(struct mv88e6xxx_chip *chip, int irq)
802{ 803{
803 u16 reg; 804 u16 reg;
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index 669f59017b12..25f92b3d7157 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -149,7 +149,26 @@
149#define MV88E6390_G2_EEPROM_ADDR_MASK 0xffff 149#define MV88E6390_G2_EEPROM_ADDR_MASK 0xffff
150 150
151/* Offset 0x16: AVB Command Register */ 151/* Offset 0x16: AVB Command Register */
152#define MV88E6352_G2_AVB_CMD 0x16 152#define MV88E6352_G2_AVB_CMD 0x16
153#define MV88E6352_G2_AVB_CMD_BUSY 0x8000
154#define MV88E6352_G2_AVB_CMD_OP_READ 0x4000
155#define MV88E6352_G2_AVB_CMD_OP_READ_INCR 0x6000
156#define MV88E6352_G2_AVB_CMD_OP_WRITE 0x3000
157#define MV88E6390_G2_AVB_CMD_OP_READ 0x0000
158#define MV88E6390_G2_AVB_CMD_OP_READ_INCR 0x4000
159#define MV88E6390_G2_AVB_CMD_OP_WRITE 0x6000
160#define MV88E6352_G2_AVB_CMD_PORT_MASK 0x0f00
161#define MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL 0xe
162#define MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL 0xf
163#define MV88E6390_G2_AVB_CMD_PORT_MASK 0x1f00
164#define MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL 0x1e
165#define MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL 0x1f
166#define MV88E6352_G2_AVB_CMD_BLOCK_PTP 0
167#define MV88E6352_G2_AVB_CMD_BLOCK_AVB 1
168#define MV88E6352_G2_AVB_CMD_BLOCK_QAV 2
169#define MV88E6352_G2_AVB_CMD_BLOCK_QVB 3
170#define MV88E6352_G2_AVB_CMD_BLOCK_MASK 0x00e0
171#define MV88E6352_G2_AVB_CMD_ADDR_MASK 0x001f
153 172
154/* Offset 0x17: AVB Data Register */ 173/* Offset 0x17: AVB Data Register */
155#define MV88E6352_G2_AVB_DATA 0x17 174#define MV88E6352_G2_AVB_DATA 0x17
@@ -223,6 +242,35 @@
223#define MV88E6352_G2_NOEGR_POLICY 0x2000 242#define MV88E6352_G2_NOEGR_POLICY 0x2000
224#define MV88E6390_G2_LAG_ID_4 0x2000 243#define MV88E6390_G2_LAG_ID_4 0x2000
225 244
245/* Scratch/Misc registers accessed through MV88E6XXX_G2_SCRATCH_MISC */
246/* Offset 0x02: Misc Configuration */
247#define MV88E6352_G2_SCRATCH_MISC_CFG 0x02
248#define MV88E6352_G2_SCRATCH_MISC_CFG_NORMALSMI 0x80
249/* Offset 0x60-0x61: GPIO Configuration */
250#define MV88E6352_G2_SCRATCH_GPIO_CFG0 0x60
251#define MV88E6352_G2_SCRATCH_GPIO_CFG1 0x61
252/* Offset 0x62-0x63: GPIO Direction */
253#define MV88E6352_G2_SCRATCH_GPIO_DIR0 0x62
254#define MV88E6352_G2_SCRATCH_GPIO_DIR1 0x63
255#define MV88E6352_G2_SCRATCH_GPIO_DIR_OUT 0
256#define MV88E6352_G2_SCRATCH_GPIO_DIR_IN 1
257/* Offset 0x64-0x65: GPIO Data */
258#define MV88E6352_G2_SCRATCH_GPIO_DATA0 0x64
259#define MV88E6352_G2_SCRATCH_GPIO_DATA1 0x65
260/* Offset 0x68-0x6F: GPIO Pin Control */
261#define MV88E6352_G2_SCRATCH_GPIO_PCTL0 0x68
262#define MV88E6352_G2_SCRATCH_GPIO_PCTL1 0x69
263#define MV88E6352_G2_SCRATCH_GPIO_PCTL2 0x6A
264#define MV88E6352_G2_SCRATCH_GPIO_PCTL3 0x6B
265#define MV88E6352_G2_SCRATCH_GPIO_PCTL4 0x6C
266#define MV88E6352_G2_SCRATCH_GPIO_PCTL5 0x6D
267#define MV88E6352_G2_SCRATCH_GPIO_PCTL6 0x6E
268#define MV88E6352_G2_SCRATCH_GPIO_PCTL7 0x6F
269
270#define MV88E6352_G2_SCRATCH_GPIO_PCTL_GPIO 0
271#define MV88E6352_G2_SCRATCH_GPIO_PCTL_TRIG 1
272#define MV88E6352_G2_SCRATCH_GPIO_PCTL_EVREQ 2
273
226#ifdef CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 274#ifdef CONFIG_NET_DSA_MV88E6XXX_GLOBAL2
227 275
228static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip) 276static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
@@ -230,6 +278,11 @@ static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
230 return 0; 278 return 0;
231} 279}
232 280
281int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val);
282int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val);
283int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update);
284int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask);
285
233int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port); 286int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port);
234int mv88e6390_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port); 287int mv88e6390_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port);
235 288
@@ -267,6 +320,11 @@ int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip);
267extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops; 320extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops;
268extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops; 321extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops;
269 322
323extern const struct mv88e6xxx_avb_ops mv88e6352_avb_ops;
324extern const struct mv88e6xxx_avb_ops mv88e6390_avb_ops;
325
326extern const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops;
327
270#else /* !CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */ 328#else /* !CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
271 329
272static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip) 330static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
@@ -279,6 +337,26 @@ static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
279 return 0; 337 return 0;
280} 338}
281 339
340static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
341{
342 return -EOPNOTSUPP;
343}
344
345static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
346{
347 return -EOPNOTSUPP;
348}
349
350static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
351{
352 return -EOPNOTSUPP;
353}
354
355static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
356{
357 return -EOPNOTSUPP;
358}
359
282static inline int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip, 360static inline int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip,
283 int port) 361 int port)
284{ 362{
@@ -382,6 +460,11 @@ static inline int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip)
382static const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops = {}; 460static const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops = {};
383static const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops = {}; 461static const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops = {};
384 462
463static const struct mv88e6xxx_avb_ops mv88e6352_avb_ops = {};
464static const struct mv88e6xxx_avb_ops mv88e6390_avb_ops = {};
465
466static const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = {};
467
385#endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */ 468#endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
386 469
387#endif /* _MV88E6XXX_GLOBAL2_H */ 470#endif /* _MV88E6XXX_GLOBAL2_H */
diff --git a/drivers/net/dsa/mv88e6xxx/global2_avb.c b/drivers/net/dsa/mv88e6xxx/global2_avb.c
new file mode 100644
index 000000000000..2e398ccb88ca
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/global2_avb.c
@@ -0,0 +1,193 @@
1/*
2 * Marvell 88E6xxx Switch Global 2 Registers support
3 *
4 * Copyright (c) 2008 Marvell Semiconductor
5 *
6 * Copyright (c) 2016-2017 Savoir-faire Linux Inc.
7 * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
8 *
9 * Copyright (c) 2017 National Instruments
10 * Brandon Streiff <brandon.streiff@ni.com>
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 */
17
18#include "global2.h"
19
20/* Offset 0x16: AVB Command Register
21 * Offset 0x17: AVB Data Register
22 *
23 * There are two different versions of this register interface:
24 * "6352": 3-bit "op" field, 4-bit "port" field.
25 * "6390": 2-bit "op" field, 5-bit "port" field.
26 *
27 * The "op" codes are different between the two, as well as the special
28 * port fields for global PTP and TAI configuration.
29 */
30
31/* mv88e6xxx_g2_avb_read -- Read one or multiple 16-bit words.
32 * The hardware supports snapshotting up to four contiguous registers.
33 */
34static int mv88e6xxx_g2_avb_read(struct mv88e6xxx_chip *chip, u16 readop,
35 u16 *data, int len)
36{
37 int err;
38 int i;
39
40 /* Hardware can only snapshot four words. */
41 if (len > 4)
42 return -E2BIG;
43
44 err = mv88e6xxx_g2_update(chip, MV88E6352_G2_AVB_CMD, readop);
45 if (err)
46 return err;
47
48 for (i = 0; i < len; ++i) {
49 err = mv88e6xxx_g2_read(chip, MV88E6352_G2_AVB_DATA,
50 &data[i]);
51 if (err)
52 return err;
53 }
54
55 return 0;
56}
57
58/* mv88e6xxx_g2_avb_write -- Write one 16-bit word. */
59static int mv88e6xxx_g2_avb_write(struct mv88e6xxx_chip *chip, u16 writeop,
60 u16 data)
61{
62 int err;
63
64 err = mv88e6xxx_g2_write(chip, MV88E6352_G2_AVB_DATA, data);
65 if (err)
66 return err;
67
68 return mv88e6xxx_g2_update(chip, MV88E6352_G2_AVB_CMD, writeop);
69}
70
71static int mv88e6352_g2_avb_port_ptp_read(struct mv88e6xxx_chip *chip,
72 int port, int addr, u16 *data,
73 int len)
74{
75 u16 readop = (len == 1 ? MV88E6352_G2_AVB_CMD_OP_READ :
76 MV88E6352_G2_AVB_CMD_OP_READ_INCR) |
77 (port << 8) | (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) |
78 addr;
79
80 return mv88e6xxx_g2_avb_read(chip, readop, data, len);
81}
82
83static int mv88e6352_g2_avb_port_ptp_write(struct mv88e6xxx_chip *chip,
84 int port, int addr, u16 data)
85{
86 u16 writeop = MV88E6352_G2_AVB_CMD_OP_WRITE | (port << 8) |
87 (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) | addr;
88
89 return mv88e6xxx_g2_avb_write(chip, writeop, data);
90}
91
92static int mv88e6352_g2_avb_ptp_read(struct mv88e6xxx_chip *chip, int addr,
93 u16 *data, int len)
94{
95 return mv88e6352_g2_avb_port_ptp_read(chip,
96 MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL,
97 addr, data, len);
98}
99
100static int mv88e6352_g2_avb_ptp_write(struct mv88e6xxx_chip *chip, int addr,
101 u16 data)
102{
103 return mv88e6352_g2_avb_port_ptp_write(chip,
104 MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL,
105 addr, data);
106}
107
108static int mv88e6352_g2_avb_tai_read(struct mv88e6xxx_chip *chip, int addr,
109 u16 *data, int len)
110{
111 return mv88e6352_g2_avb_port_ptp_read(chip,
112 MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL,
113 addr, data, len);
114}
115
116static int mv88e6352_g2_avb_tai_write(struct mv88e6xxx_chip *chip, int addr,
117 u16 data)
118{
119 return mv88e6352_g2_avb_port_ptp_write(chip,
120 MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL,
121 addr, data);
122}
123
124const struct mv88e6xxx_avb_ops mv88e6352_avb_ops = {
125 .port_ptp_read = mv88e6352_g2_avb_port_ptp_read,
126 .port_ptp_write = mv88e6352_g2_avb_port_ptp_write,
127 .ptp_read = mv88e6352_g2_avb_ptp_read,
128 .ptp_write = mv88e6352_g2_avb_ptp_write,
129 .tai_read = mv88e6352_g2_avb_tai_read,
130 .tai_write = mv88e6352_g2_avb_tai_write,
131};
132
133static int mv88e6390_g2_avb_port_ptp_read(struct mv88e6xxx_chip *chip,
134 int port, int addr, u16 *data,
135 int len)
136{
137 u16 readop = (len == 1 ? MV88E6390_G2_AVB_CMD_OP_READ :
138 MV88E6390_G2_AVB_CMD_OP_READ_INCR) |
139 (port << 8) | (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) |
140 addr;
141
142 return mv88e6xxx_g2_avb_read(chip, readop, data, len);
143}
144
145static int mv88e6390_g2_avb_port_ptp_write(struct mv88e6xxx_chip *chip,
146 int port, int addr, u16 data)
147{
148 u16 writeop = MV88E6390_G2_AVB_CMD_OP_WRITE | (port << 8) |
149 (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) | addr;
150
151 return mv88e6xxx_g2_avb_write(chip, writeop, data);
152}
153
154static int mv88e6390_g2_avb_ptp_read(struct mv88e6xxx_chip *chip, int addr,
155 u16 *data, int len)
156{
157 return mv88e6390_g2_avb_port_ptp_read(chip,
158 MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL,
159 addr, data, len);
160}
161
162static int mv88e6390_g2_avb_ptp_write(struct mv88e6xxx_chip *chip, int addr,
163 u16 data)
164{
165 return mv88e6390_g2_avb_port_ptp_write(chip,
166 MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL,
167 addr, data);
168}
169
170static int mv88e6390_g2_avb_tai_read(struct mv88e6xxx_chip *chip, int addr,
171 u16 *data, int len)
172{
173 return mv88e6390_g2_avb_port_ptp_read(chip,
174 MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL,
175 addr, data, len);
176}
177
178static int mv88e6390_g2_avb_tai_write(struct mv88e6xxx_chip *chip, int addr,
179 u16 data)
180{
181 return mv88e6390_g2_avb_port_ptp_write(chip,
182 MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL,
183 addr, data);
184}
185
186const struct mv88e6xxx_avb_ops mv88e6390_avb_ops = {
187 .port_ptp_read = mv88e6390_g2_avb_port_ptp_read,
188 .port_ptp_write = mv88e6390_g2_avb_port_ptp_write,
189 .ptp_read = mv88e6390_g2_avb_ptp_read,
190 .ptp_write = mv88e6390_g2_avb_ptp_write,
191 .tai_read = mv88e6390_g2_avb_tai_read,
192 .tai_write = mv88e6390_g2_avb_tai_write,
193};
diff --git a/drivers/net/dsa/mv88e6xxx/global2_scratch.c b/drivers/net/dsa/mv88e6xxx/global2_scratch.c
new file mode 100644
index 000000000000..0ff12bff9f0e
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/global2_scratch.c
@@ -0,0 +1,240 @@
1/*
2 * Marvell 88E6xxx Switch Global 2 Scratch & Misc Registers support
3 *
4 * Copyright (c) 2008 Marvell Semiconductor
5 *
6 * Copyright (c) 2017 National Instruments
7 * Brandon Streiff <brandon.streiff@ni.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 */
14
15#include "chip.h"
16#include "global2.h"
17
18/* Offset 0x1A: Scratch and Misc. Register */
19static int mv88e6xxx_g2_scratch_read(struct mv88e6xxx_chip *chip, int reg,
20 u8 *data)
21{
22 u16 value;
23 int err;
24
25 err = mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC,
26 reg << 8);
27 if (err)
28 return err;
29
30 err = mv88e6xxx_g2_read(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC, &value);
31 if (err)
32 return err;
33
34 *data = (value & MV88E6XXX_G2_SCRATCH_MISC_DATA_MASK);
35
36 return 0;
37}
38
39static int mv88e6xxx_g2_scratch_write(struct mv88e6xxx_chip *chip, int reg,
40 u8 data)
41{
42 u16 value = (reg << 8) | data;
43
44 return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC, value);
45}
46
47/**
48 * mv88e6xxx_g2_scratch_gpio_get_bit - get a bit
49 * @chip: chip private data
50 * @nr: bit index
51 * @set: is bit set?
52 */
53static int mv88e6xxx_g2_scratch_get_bit(struct mv88e6xxx_chip *chip,
54 int base_reg, unsigned int offset,
55 int *set)
56{
57 int reg = base_reg + (offset / 8);
58 u8 mask = (1 << (offset & 0x7));
59 u8 val;
60 int err;
61
62 err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
63 if (err)
64 return err;
65
66 *set = !!(mask & val);
67
68 return 0;
69}
70
71/**
72 * mv88e6xxx_g2_scratch_gpio_set_bit - set (or clear) a bit
73 * @chip: chip private data
74 * @nr: bit index
75 * @set: set if true, clear if false
76 *
77 * Helper function for dealing with the direction and data registers.
78 */
79static int mv88e6xxx_g2_scratch_set_bit(struct mv88e6xxx_chip *chip,
80 int base_reg, unsigned int offset,
81 int set)
82{
83 int reg = base_reg + (offset / 8);
84 u8 mask = (1 << (offset & 0x7));
85 u8 val;
86 int err;
87
88 err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
89 if (err)
90 return err;
91
92 if (set)
93 val |= mask;
94 else
95 val &= ~mask;
96
97 return mv88e6xxx_g2_scratch_write(chip, reg, val);
98}
99
100/**
101 * mv88e6352_g2_scratch_gpio_get_data - get data on gpio pin
102 * @chip: chip private data
103 * @pin: gpio index
104 *
105 * Return: 0 for low, 1 for high, negative error
106 */
107static int mv88e6352_g2_scratch_gpio_get_data(struct mv88e6xxx_chip *chip,
108 unsigned int pin)
109{
110 int val = 0;
111 int err;
112
113 err = mv88e6xxx_g2_scratch_get_bit(chip,
114 MV88E6352_G2_SCRATCH_GPIO_DATA0,
115 pin, &val);
116 if (err)
117 return err;
118
119 return val;
120}
121
122/**
123 * mv88e6352_g2_scratch_gpio_set_data - set data on gpio pin
124 * @chip: chip private data
125 * @pin: gpio index
126 * @value: value to set
127 */
128static int mv88e6352_g2_scratch_gpio_set_data(struct mv88e6xxx_chip *chip,
129 unsigned int pin, int value)
130{
131 u8 mask = (1 << (pin & 0x7));
132 int offset = (pin / 8);
133 int reg;
134
135 reg = MV88E6352_G2_SCRATCH_GPIO_DATA0 + offset;
136
137 if (value)
138 chip->gpio_data[offset] |= mask;
139 else
140 chip->gpio_data[offset] &= ~mask;
141
142 return mv88e6xxx_g2_scratch_write(chip, reg, chip->gpio_data[offset]);
143}
144
145/**
146 * mv88e6352_g2_scratch_gpio_get_dir - get direction of gpio pin
147 * @chip: chip private data
148 * @pin: gpio index
149 *
150 * Return: 0 for output, 1 for input (same as GPIOF_DIR_XXX).
151 */
152static int mv88e6352_g2_scratch_gpio_get_dir(struct mv88e6xxx_chip *chip,
153 unsigned int pin)
154{
155 int val = 0;
156 int err;
157
158 err = mv88e6xxx_g2_scratch_get_bit(chip,
159 MV88E6352_G2_SCRATCH_GPIO_DIR0,
160 pin, &val);
161 if (err)
162 return err;
163
164 return val;
165}
166
167/**
168 * mv88e6352_g2_scratch_gpio_set_dir - set direction of gpio pin
169 * @chip: chip private data
170 * @pin: gpio index
171 */
172static int mv88e6352_g2_scratch_gpio_set_dir(struct mv88e6xxx_chip *chip,
173 unsigned int pin, bool input)
174{
175 int value = (input ? MV88E6352_G2_SCRATCH_GPIO_DIR_IN :
176 MV88E6352_G2_SCRATCH_GPIO_DIR_OUT);
177
178 return mv88e6xxx_g2_scratch_set_bit(chip,
179 MV88E6352_G2_SCRATCH_GPIO_DIR0,
180 pin, value);
181}
182
183/**
184 * mv88e6352_g2_scratch_gpio_get_pctl - get pin control setting
185 * @chip: chip private data
186 * @pin: gpio index
187 * @func: function number
188 *
189 * Note that the function numbers themselves may vary by chipset.
190 */
191static int mv88e6352_g2_scratch_gpio_get_pctl(struct mv88e6xxx_chip *chip,
192 unsigned int pin, int *func)
193{
194 int reg = MV88E6352_G2_SCRATCH_GPIO_PCTL0 + (pin / 2);
195 int offset = (pin & 0x1) ? 4 : 0;
196 u8 mask = (0x7 << offset);
197 int err;
198 u8 val;
199
200 err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
201 if (err)
202 return err;
203
204 *func = (val & mask) >> offset;
205
206 return 0;
207}
208
209/**
210 * mv88e6352_g2_scratch_gpio_set_pctl - set pin control setting
211 * @chip: chip private data
212 * @pin: gpio index
213 * @func: function number
214 */
215static int mv88e6352_g2_scratch_gpio_set_pctl(struct mv88e6xxx_chip *chip,
216 unsigned int pin, int func)
217{
218 int reg = MV88E6352_G2_SCRATCH_GPIO_PCTL0 + (pin / 2);
219 int offset = (pin & 0x1) ? 4 : 0;
220 u8 mask = (0x7 << offset);
221 int err;
222 u8 val;
223
224 err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
225 if (err)
226 return err;
227
228 val = (val & ~mask) | ((func & mask) << offset);
229
230 return mv88e6xxx_g2_scratch_write(chip, reg, val);
231}
232
233const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = {
234 .get_data = mv88e6352_g2_scratch_gpio_get_data,
235 .set_data = mv88e6352_g2_scratch_gpio_set_data,
236 .get_dir = mv88e6352_g2_scratch_gpio_get_dir,
237 .set_dir = mv88e6352_g2_scratch_gpio_set_dir,
238 .get_pctl = mv88e6352_g2_scratch_gpio_get_pctl,
239 .set_pctl = mv88e6352_g2_scratch_gpio_set_pctl,
240};
diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.c b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
new file mode 100644
index 000000000000..ac7694c71266
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
@@ -0,0 +1,576 @@
1/*
2 * Marvell 88E6xxx Switch hardware timestamping support
3 *
4 * Copyright (c) 2008 Marvell Semiconductor
5 *
6 * Copyright (c) 2017 National Instruments
7 * Erik Hons <erik.hons@ni.com>
8 * Brandon Streiff <brandon.streiff@ni.com>
9 * Dane Wagner <dane.wagner@ni.com>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 */
16
17#include "chip.h"
18#include "global2.h"
19#include "hwtstamp.h"
20#include "ptp.h"
21#include <linux/ptp_classify.h>
22
23#define SKB_PTP_TYPE(__skb) (*(unsigned int *)((__skb)->cb))
24
25static int mv88e6xxx_port_ptp_read(struct mv88e6xxx_chip *chip, int port,
26 int addr, u16 *data, int len)
27{
28 if (!chip->info->ops->avb_ops->port_ptp_read)
29 return -EOPNOTSUPP;
30
31 return chip->info->ops->avb_ops->port_ptp_read(chip, port, addr,
32 data, len);
33}
34
35static int mv88e6xxx_port_ptp_write(struct mv88e6xxx_chip *chip, int port,
36 int addr, u16 data)
37{
38 if (!chip->info->ops->avb_ops->port_ptp_write)
39 return -EOPNOTSUPP;
40
41 return chip->info->ops->avb_ops->port_ptp_write(chip, port, addr,
42 data);
43}
44
45static int mv88e6xxx_ptp_write(struct mv88e6xxx_chip *chip, int addr,
46 u16 data)
47{
48 if (!chip->info->ops->avb_ops->ptp_write)
49 return -EOPNOTSUPP;
50
51 return chip->info->ops->avb_ops->ptp_write(chip, addr, data);
52}
53
54/* TX_TSTAMP_TIMEOUT: This limits the time spent polling for a TX
55 * timestamp. When working properly, hardware will produce a timestamp
56 * within 1ms. Software may enounter delays due to MDIO contention, so
57 * the timeout is set accordingly.
58 */
59#define TX_TSTAMP_TIMEOUT msecs_to_jiffies(20)
60
61int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
62 struct ethtool_ts_info *info)
63{
64 struct mv88e6xxx_chip *chip = ds->priv;
65
66 if (!chip->info->ptp_support)
67 return -EOPNOTSUPP;
68
69 info->so_timestamping =
70 SOF_TIMESTAMPING_TX_HARDWARE |
71 SOF_TIMESTAMPING_RX_HARDWARE |
72 SOF_TIMESTAMPING_RAW_HARDWARE;
73 info->phc_index = ptp_clock_index(chip->ptp_clock);
74 info->tx_types =
75 (1 << HWTSTAMP_TX_OFF) |
76 (1 << HWTSTAMP_TX_ON);
77 info->rx_filters =
78 (1 << HWTSTAMP_FILTER_NONE) |
79 (1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT) |
80 (1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) |
81 (1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) |
82 (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
83 (1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
84 (1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) |
85 (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) |
86 (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) |
87 (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ);
88
89 return 0;
90}
91
92static int mv88e6xxx_set_hwtstamp_config(struct mv88e6xxx_chip *chip, int port,
93 struct hwtstamp_config *config)
94{
95 struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
96 bool tstamp_enable = false;
97 u16 port_config0;
98 int err;
99
100 /* Prevent the TX/RX paths from trying to interact with the
101 * timestamp hardware while we reconfigure it.
102 */
103 clear_bit_unlock(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state);
104
105 /* reserved for future extensions */
106 if (config->flags)
107 return -EINVAL;
108
109 switch (config->tx_type) {
110 case HWTSTAMP_TX_OFF:
111 tstamp_enable = false;
112 break;
113 case HWTSTAMP_TX_ON:
114 tstamp_enable = true;
115 break;
116 default:
117 return -ERANGE;
118 }
119
120 /* The switch supports timestamping both L2 and L4; one cannot be
121 * disabled independently of the other.
122 */
123 switch (config->rx_filter) {
124 case HWTSTAMP_FILTER_NONE:
125 tstamp_enable = false;
126 break;
127 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
128 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
129 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
130 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
131 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
132 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
133 case HWTSTAMP_FILTER_PTP_V2_EVENT:
134 case HWTSTAMP_FILTER_PTP_V2_SYNC:
135 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
136 config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
137 break;
138 case HWTSTAMP_FILTER_ALL:
139 default:
140 config->rx_filter = HWTSTAMP_FILTER_NONE;
141 return -ERANGE;
142 }
143
144 if (tstamp_enable) {
145 /* Disable transportSpecific value matching, so that packets
146 * with either 1588 (0) and 802.1AS (1) will be timestamped.
147 */
148 port_config0 = MV88E6XXX_PORT_PTP_CFG0_DISABLE_TSPEC_MATCH;
149 } else {
150 /* Disable PTP. This disables both RX and TX timestamping. */
151 port_config0 = MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP;
152 }
153
154 mutex_lock(&chip->reg_lock);
155 err = mv88e6xxx_port_ptp_write(chip, port, MV88E6XXX_PORT_PTP_CFG0,
156 port_config0);
157 mutex_unlock(&chip->reg_lock);
158
159 if (err < 0)
160 return err;
161
162 /* Once hardware has been configured, enable timestamp checks
163 * in the RX/TX paths.
164 */
165 if (tstamp_enable)
166 set_bit(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state);
167
168 return 0;
169}
170
171int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, int port,
172 struct ifreq *ifr)
173{
174 struct mv88e6xxx_chip *chip = ds->priv;
175 struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
176 struct hwtstamp_config config;
177 int err;
178
179 if (!chip->info->ptp_support)
180 return -EOPNOTSUPP;
181
182 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
183 return -EFAULT;
184
185 err = mv88e6xxx_set_hwtstamp_config(chip, port, &config);
186 if (err)
187 return err;
188
189 /* Save the chosen configuration to be returned later. */
190 memcpy(&ps->tstamp_config, &config, sizeof(config));
191
192 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
193 -EFAULT : 0;
194}
195
196int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port,
197 struct ifreq *ifr)
198{
199 struct mv88e6xxx_chip *chip = ds->priv;
200 struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
201 struct hwtstamp_config *config = &ps->tstamp_config;
202
203 if (!chip->info->ptp_support)
204 return -EOPNOTSUPP;
205
206 return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ?
207 -EFAULT : 0;
208}
209
210/* Get the start of the PTP header in this skb */
211static u8 *parse_ptp_header(struct sk_buff *skb, unsigned int type)
212{
213 u8 *data = skb_mac_header(skb);
214 unsigned int offset = 0;
215
216 if (type & PTP_CLASS_VLAN)
217 offset += VLAN_HLEN;
218
219 switch (type & PTP_CLASS_PMASK) {
220 case PTP_CLASS_IPV4:
221 offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
222 break;
223 case PTP_CLASS_IPV6:
224 offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
225 break;
226 case PTP_CLASS_L2:
227 offset += ETH_HLEN;
228 break;
229 default:
230 return NULL;
231 }
232
233 /* Ensure that the entire header is present in this packet. */
234 if (skb->len + ETH_HLEN < offset + 34)
235 return NULL;
236
237 return data + offset;
238}
239
240/* Returns a pointer to the PTP header if the caller should time stamp,
241 * or NULL if the caller should not.
242 */
243static u8 *mv88e6xxx_should_tstamp(struct mv88e6xxx_chip *chip, int port,
244 struct sk_buff *skb, unsigned int type)
245{
246 struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
247 u8 *hdr;
248
249 if (!chip->info->ptp_support)
250 return NULL;
251
252 hdr = parse_ptp_header(skb, type);
253 if (!hdr)
254 return NULL;
255
256 if (!test_bit(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state))
257 return NULL;
258
259 return hdr;
260}
261
262static int mv88e6xxx_ts_valid(u16 status)
263{
264 if (!(status & MV88E6XXX_PTP_TS_VALID))
265 return 0;
266 if (status & MV88E6XXX_PTP_TS_STATUS_MASK)
267 return 0;
268 return 1;
269}
270
271static int seq_match(struct sk_buff *skb, u16 ts_seqid)
272{
273 unsigned int type = SKB_PTP_TYPE(skb);
274 u8 *hdr = parse_ptp_header(skb, type);
275 __be16 *seqid;
276
277 seqid = (__be16 *)(hdr + OFF_PTP_SEQUENCE_ID);
278
279 return ts_seqid == ntohs(*seqid);
280}
281
282static void mv88e6xxx_get_rxts(struct mv88e6xxx_chip *chip,
283 struct mv88e6xxx_port_hwtstamp *ps,
284 struct sk_buff *skb, u16 reg,
285 struct sk_buff_head *rxq)
286{
287 u16 buf[4] = { 0 }, status, seq_id;
288 u64 ns, timelo, timehi;
289 struct skb_shared_hwtstamps *shwt;
290 int err;
291
292 mutex_lock(&chip->reg_lock);
293 err = mv88e6xxx_port_ptp_read(chip, ps->port_id,
294 reg, buf, ARRAY_SIZE(buf));
295 mutex_unlock(&chip->reg_lock);
296 if (err)
297 pr_err("failed to get the receive time stamp\n");
298
299 status = buf[0];
300 timelo = buf[1];
301 timehi = buf[2];
302 seq_id = buf[3];
303
304 if (status & MV88E6XXX_PTP_TS_VALID) {
305 mutex_lock(&chip->reg_lock);
306 err = mv88e6xxx_port_ptp_write(chip, ps->port_id, reg, 0);
307 mutex_unlock(&chip->reg_lock);
308 if (err)
309 pr_err("failed to clear the receive status\n");
310 }
311 /* Since the device can only handle one time stamp at a time,
312 * we purge any extra frames from the queue.
313 */
314 for ( ; skb; skb = skb_dequeue(rxq)) {
315 if (mv88e6xxx_ts_valid(status) && seq_match(skb, seq_id)) {
316 ns = timehi << 16 | timelo;
317
318 mutex_lock(&chip->reg_lock);
319 ns = timecounter_cyc2time(&chip->tstamp_tc, ns);
320 mutex_unlock(&chip->reg_lock);
321 shwt = skb_hwtstamps(skb);
322 memset(shwt, 0, sizeof(*shwt));
323 shwt->hwtstamp = ns_to_ktime(ns);
324 status &= ~MV88E6XXX_PTP_TS_VALID;
325 }
326 netif_rx_ni(skb);
327 }
328}
329
330static void mv88e6xxx_rxtstamp_work(struct mv88e6xxx_chip *chip,
331 struct mv88e6xxx_port_hwtstamp *ps)
332{
333 struct sk_buff *skb;
334
335 skb = skb_dequeue(&ps->rx_queue);
336
337 if (skb)
338 mv88e6xxx_get_rxts(chip, ps, skb, MV88E6XXX_PORT_PTP_ARR0_STS,
339 &ps->rx_queue);
340
341 skb = skb_dequeue(&ps->rx_queue2);
342 if (skb)
343 mv88e6xxx_get_rxts(chip, ps, skb, MV88E6XXX_PORT_PTP_ARR1_STS,
344 &ps->rx_queue2);
345}
346
347static int is_pdelay_resp(u8 *msgtype)
348{
349 return (*msgtype & 0xf) == 3;
350}
351
352bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
353 struct sk_buff *skb, unsigned int type)
354{
355 struct mv88e6xxx_port_hwtstamp *ps;
356 struct mv88e6xxx_chip *chip;
357 u8 *hdr;
358
359 chip = ds->priv;
360 ps = &chip->port_hwtstamp[port];
361
362 if (ps->tstamp_config.rx_filter != HWTSTAMP_FILTER_PTP_V2_EVENT)
363 return false;
364
365 hdr = mv88e6xxx_should_tstamp(chip, port, skb, type);
366 if (!hdr)
367 return false;
368
369 SKB_PTP_TYPE(skb) = type;
370
371 if (is_pdelay_resp(hdr))
372 skb_queue_tail(&ps->rx_queue2, skb);
373 else
374 skb_queue_tail(&ps->rx_queue, skb);
375
376 ptp_schedule_worker(chip->ptp_clock, 0);
377
378 return true;
379}
380
381static int mv88e6xxx_txtstamp_work(struct mv88e6xxx_chip *chip,
382 struct mv88e6xxx_port_hwtstamp *ps)
383{
384 struct skb_shared_hwtstamps shhwtstamps;
385 u16 departure_block[4], status;
386 struct sk_buff *tmp_skb;
387 u32 time_raw;
388 int err;
389 u64 ns;
390
391 if (!ps->tx_skb)
392 return 0;
393
394 mutex_lock(&chip->reg_lock);
395 err = mv88e6xxx_port_ptp_read(chip, ps->port_id,
396 MV88E6XXX_PORT_PTP_DEP_STS,
397 departure_block,
398 ARRAY_SIZE(departure_block));
399 mutex_unlock(&chip->reg_lock);
400
401 if (err)
402 goto free_and_clear_skb;
403
404 if (!(departure_block[0] & MV88E6XXX_PTP_TS_VALID)) {
405 if (time_is_before_jiffies(ps->tx_tstamp_start +
406 TX_TSTAMP_TIMEOUT)) {
407 dev_warn(chip->dev, "p%d: clearing tx timestamp hang\n",
408 ps->port_id);
409 goto free_and_clear_skb;
410 }
411 /* The timestamp should be available quickly, while getting it
412 * is high priority and time bounded to only 10ms. A poll is
413 * warranted so restart the work.
414 */
415 return 1;
416 }
417
418 /* We have the timestamp; go ahead and clear valid now */
419 mutex_lock(&chip->reg_lock);
420 mv88e6xxx_port_ptp_write(chip, ps->port_id,
421 MV88E6XXX_PORT_PTP_DEP_STS, 0);
422 mutex_unlock(&chip->reg_lock);
423
424 status = departure_block[0] & MV88E6XXX_PTP_TS_STATUS_MASK;
425 if (status != MV88E6XXX_PTP_TS_STATUS_NORMAL) {
426 dev_warn(chip->dev, "p%d: tx timestamp overrun\n", ps->port_id);
427 goto free_and_clear_skb;
428 }
429
430 if (departure_block[3] != ps->tx_seq_id) {
431 dev_warn(chip->dev, "p%d: unexpected seq. id\n", ps->port_id);
432 goto free_and_clear_skb;
433 }
434
435 memset(&shhwtstamps, 0, sizeof(shhwtstamps));
436 time_raw = ((u32)departure_block[2] << 16) | departure_block[1];
437 mutex_lock(&chip->reg_lock);
438 ns = timecounter_cyc2time(&chip->tstamp_tc, time_raw);
439 mutex_unlock(&chip->reg_lock);
440 shhwtstamps.hwtstamp = ns_to_ktime(ns);
441
442 dev_dbg(chip->dev,
443 "p%d: txtstamp %llx status 0x%04x skb ID 0x%04x hw ID 0x%04x\n",
444 ps->port_id, ktime_to_ns(shhwtstamps.hwtstamp),
445 departure_block[0], ps->tx_seq_id, departure_block[3]);
446
447 /* skb_complete_tx_timestamp() will free up the client to make
448 * another timestamp-able transmit. We have to be ready for it
449 * -- by clearing the ps->tx_skb "flag" -- beforehand.
450 */
451
452 tmp_skb = ps->tx_skb;
453 ps->tx_skb = NULL;
454 clear_bit_unlock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state);
455 skb_complete_tx_timestamp(tmp_skb, &shhwtstamps);
456
457 return 0;
458
459free_and_clear_skb:
460 dev_kfree_skb_any(ps->tx_skb);
461 ps->tx_skb = NULL;
462 clear_bit_unlock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state);
463
464 return 0;
465}
466
467long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp)
468{
469 struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
470 struct dsa_switch *ds = chip->ds;
471 struct mv88e6xxx_port_hwtstamp *ps;
472 int i, restart = 0;
473
474 for (i = 0; i < ds->num_ports; i++) {
475 if (!dsa_is_user_port(ds, i))
476 continue;
477
478 ps = &chip->port_hwtstamp[i];
479 if (test_bit(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state))
480 restart |= mv88e6xxx_txtstamp_work(chip, ps);
481
482 mv88e6xxx_rxtstamp_work(chip, ps);
483 }
484
485 return restart ? 1 : -1;
486}
487
488bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
489 struct sk_buff *clone, unsigned int type)
490{
491 struct mv88e6xxx_chip *chip = ds->priv;
492 struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
493 __be16 *seq_ptr;
494 u8 *hdr;
495
496 if (!(skb_shinfo(clone)->tx_flags & SKBTX_HW_TSTAMP))
497 return false;
498
499 hdr = mv88e6xxx_should_tstamp(chip, port, clone, type);
500 if (!hdr)
501 return false;
502
503 seq_ptr = (__be16 *)(hdr + OFF_PTP_SEQUENCE_ID);
504
505 if (test_and_set_bit_lock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS,
506 &ps->state))
507 return false;
508
509 ps->tx_skb = clone;
510 ps->tx_tstamp_start = jiffies;
511 ps->tx_seq_id = be16_to_cpup(seq_ptr);
512
513 ptp_schedule_worker(chip->ptp_clock, 0);
514 return true;
515}
516
517static int mv88e6xxx_hwtstamp_port_setup(struct mv88e6xxx_chip *chip, int port)
518{
519 struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
520
521 ps->port_id = port;
522
523 skb_queue_head_init(&ps->rx_queue);
524 skb_queue_head_init(&ps->rx_queue2);
525
526 return mv88e6xxx_port_ptp_write(chip, port, MV88E6XXX_PORT_PTP_CFG0,
527 MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP);
528}
529
530int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip)
531{
532 int err;
533 int i;
534
535 /* Disable timestamping on all ports. */
536 for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
537 err = mv88e6xxx_hwtstamp_port_setup(chip, i);
538 if (err)
539 return err;
540 }
541
542 /* MV88E6XXX_PTP_MSG_TYPE is a mask of PTP message types to
543 * timestamp. This affects all ports that have timestamping enabled,
544 * but the timestamp config is per-port; thus we configure all events
545 * here and only support the HWTSTAMP_FILTER_*_EVENT filter types.
546 */
547 err = mv88e6xxx_ptp_write(chip, MV88E6XXX_PTP_MSGTYPE,
548 MV88E6XXX_PTP_MSGTYPE_ALL_EVENT);
549 if (err)
550 return err;
551
552 /* Use ARRIVAL1 for peer delay response messages. */
553 err = mv88e6xxx_ptp_write(chip, MV88E6XXX_PTP_TS_ARRIVAL_PTR,
554 MV88E6XXX_PTP_MSGTYPE_PDLAY_RES);
555 if (err)
556 return err;
557
558 /* 88E6341 devices default to timestamping at the PHY, but this has
559 * a hardware issue that results in unreliable timestamps. Force
560 * these devices to timestamp at the MAC.
561 */
562 if (chip->info->family == MV88E6XXX_FAMILY_6341) {
563 u16 val = MV88E6341_PTP_CFG_UPDATE |
564 MV88E6341_PTP_CFG_MODE_IDX |
565 MV88E6341_PTP_CFG_MODE_TS_AT_MAC;
566 err = mv88e6xxx_ptp_write(chip, MV88E6341_PTP_CFG, val);
567 if (err)
568 return err;
569 }
570
571 return 0;
572}
573
574void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip)
575{
576}
diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.h b/drivers/net/dsa/mv88e6xxx/hwtstamp.h
new file mode 100644
index 000000000000..bc71c9212a08
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.h
@@ -0,0 +1,172 @@
1/*
2 * Marvell 88E6xxx Switch hardware timestamping support
3 *
4 * Copyright (c) 2008 Marvell Semiconductor
5 *
6 * Copyright (c) 2017 National Instruments
7 * Erik Hons <erik.hons@ni.com>
8 * Brandon Streiff <brandon.streiff@ni.com>
9 * Dane Wagner <dane.wagner@ni.com>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 */
16
17#ifndef _MV88E6XXX_HWTSTAMP_H
18#define _MV88E6XXX_HWTSTAMP_H
19
20#include "chip.h"
21
22/* Global PTP registers */
23/* Offset 0x00: PTP EtherType */
24#define MV88E6XXX_PTP_ETHERTYPE 0x00
25
26/* Offset 0x01: Message Type Timestamp Enables */
27#define MV88E6XXX_PTP_MSGTYPE 0x01
28#define MV88E6XXX_PTP_MSGTYPE_SYNC 0x0001
29#define MV88E6XXX_PTP_MSGTYPE_DELAY_REQ 0x0002
30#define MV88E6XXX_PTP_MSGTYPE_PDLAY_REQ 0x0004
31#define MV88E6XXX_PTP_MSGTYPE_PDLAY_RES 0x0008
32#define MV88E6XXX_PTP_MSGTYPE_ALL_EVENT 0x000f
33
34/* Offset 0x02: Timestamp Arrival Capture Pointers */
35#define MV88E6XXX_PTP_TS_ARRIVAL_PTR 0x02
36
37/* Offset 0x07: PTP Global Configuration */
38#define MV88E6341_PTP_CFG 0x07
39#define MV88E6341_PTP_CFG_UPDATE 0x8000
40#define MV88E6341_PTP_CFG_IDX_MASK 0x7f00
41#define MV88E6341_PTP_CFG_DATA_MASK 0x00ff
42#define MV88E6341_PTP_CFG_MODE_IDX 0x0
43#define MV88E6341_PTP_CFG_MODE_TS_AT_PHY 0x00
44#define MV88E6341_PTP_CFG_MODE_TS_AT_MAC 0x80
45
46/* Offset 0x08: PTP Interrupt Status */
47#define MV88E6XXX_PTP_IRQ_STATUS 0x08
48
49/* Per-Port PTP Registers */
50/* Offset 0x00: PTP Configuration 0 */
51#define MV88E6XXX_PORT_PTP_CFG0 0x00
52#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_SHIFT 12
53#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_MASK 0xf000
54#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_1588 0x0000
55#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_8021AS 0x1000
56#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_TSPEC_MATCH 0x0800
57#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_OVERWRITE 0x0002
58#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP 0x0001
59
60/* Offset 0x01: PTP Configuration 1 */
61#define MV88E6XXX_PORT_PTP_CFG1 0x01
62
63/* Offset 0x02: PTP Configuration 2 */
64#define MV88E6XXX_PORT_PTP_CFG2 0x02
65#define MV88E6XXX_PORT_PTP_CFG2_EMBED_ARRIVAL 0x1000
66#define MV88E6XXX_PORT_PTP_CFG2_DEP_IRQ_EN 0x0002
67#define MV88E6XXX_PORT_PTP_CFG2_ARR_IRQ_EN 0x0001
68
69/* Offset 0x03: PTP LED Configuration */
70#define MV88E6XXX_PORT_PTP_LED_CFG 0x03
71
72/* Offset 0x08: PTP Arrival 0 Status */
73#define MV88E6XXX_PORT_PTP_ARR0_STS 0x08
74
75/* Offset 0x09/0x0A: PTP Arrival 0 Time */
76#define MV88E6XXX_PORT_PTP_ARR0_TIME_LO 0x09
77#define MV88E6XXX_PORT_PTP_ARR0_TIME_HI 0x0a
78
79/* Offset 0x0B: PTP Arrival 0 Sequence ID */
80#define MV88E6XXX_PORT_PTP_ARR0_SEQID 0x0b
81
82/* Offset 0x0C: PTP Arrival 1 Status */
83#define MV88E6XXX_PORT_PTP_ARR1_STS 0x0c
84
85/* Offset 0x0D/0x0E: PTP Arrival 1 Time */
86#define MV88E6XXX_PORT_PTP_ARR1_TIME_LO 0x0d
87#define MV88E6XXX_PORT_PTP_ARR1_TIME_HI 0x0e
88
89/* Offset 0x0F: PTP Arrival 1 Sequence ID */
90#define MV88E6XXX_PORT_PTP_ARR1_SEQID 0x0f
91
92/* Offset 0x10: PTP Departure Status */
93#define MV88E6XXX_PORT_PTP_DEP_STS 0x10
94
95/* Offset 0x11/0x12: PTP Deperture Time */
96#define MV88E6XXX_PORT_PTP_DEP_TIME_LO 0x11
97#define MV88E6XXX_PORT_PTP_DEP_TIME_HI 0x12
98
99/* Offset 0x13: PTP Departure Sequence ID */
100#define MV88E6XXX_PORT_PTP_DEP_SEQID 0x13
101
102/* Status fields for arrival and depature timestamp status registers */
103#define MV88E6XXX_PTP_TS_STATUS_MASK 0x0006
104#define MV88E6XXX_PTP_TS_STATUS_NORMAL 0x0000
105#define MV88E6XXX_PTP_TS_STATUS_OVERWITTEN 0x0002
106#define MV88E6XXX_PTP_TS_STATUS_DISCARDED 0x0004
107#define MV88E6XXX_PTP_TS_VALID 0x0001
108
109#ifdef CONFIG_NET_DSA_MV88E6XXX_PTP
110
111int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, int port,
112 struct ifreq *ifr);
113int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port,
114 struct ifreq *ifr);
115
116bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
117 struct sk_buff *clone, unsigned int type);
118bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
119 struct sk_buff *clone, unsigned int type);
120
121int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
122 struct ethtool_ts_info *info);
123
124int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip);
125void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip);
126
127#else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */
128
129static inline int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds,
130 int port, struct ifreq *ifr)
131{
132 return -EOPNOTSUPP;
133}
134
135static inline int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds,
136 int port, struct ifreq *ifr)
137{
138 return -EOPNOTSUPP;
139}
140
141static inline bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
142 struct sk_buff *clone,
143 unsigned int type)
144{
145 return false;
146}
147
148static inline bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
149 struct sk_buff *clone,
150 unsigned int type)
151{
152 return false;
153}
154
155static inline int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
156 struct ethtool_ts_info *info)
157{
158 return -EOPNOTSUPP;
159}
160
161static inline int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip)
162{
163 return 0;
164}
165
166static inline void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip)
167{
168}
169
170#endif /* CONFIG_NET_DSA_MV88E6XXX_PTP */
171
172#endif /* _MV88E6XXX_HWTSTAMP_H */
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c
new file mode 100644
index 000000000000..bd85e2c390e1
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/ptp.c
@@ -0,0 +1,381 @@
1/*
2 * Marvell 88E6xxx Switch PTP support
3 *
4 * Copyright (c) 2008 Marvell Semiconductor
5 *
6 * Copyright (c) 2017 National Instruments
7 * Erik Hons <erik.hons@ni.com>
8 * Brandon Streiff <brandon.streiff@ni.com>
9 * Dane Wagner <dane.wagner@ni.com>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 */
16
17#include "chip.h"
18#include "global2.h"
19#include "ptp.h"
20
21/* Raw timestamps are in units of 8-ns clock periods. */
22#define CC_SHIFT 28
23#define CC_MULT (8 << CC_SHIFT)
24#define CC_MULT_NUM (1 << 9)
25#define CC_MULT_DEM 15625ULL
26
27#define TAI_EVENT_WORK_INTERVAL msecs_to_jiffies(100)
28
29#define cc_to_chip(cc) container_of(cc, struct mv88e6xxx_chip, tstamp_cc)
30#define dw_overflow_to_chip(dw) container_of(dw, struct mv88e6xxx_chip, \
31 overflow_work)
32#define dw_tai_event_to_chip(dw) container_of(dw, struct mv88e6xxx_chip, \
33 tai_event_work)
34
35static int mv88e6xxx_tai_read(struct mv88e6xxx_chip *chip, int addr,
36 u16 *data, int len)
37{
38 if (!chip->info->ops->avb_ops->tai_read)
39 return -EOPNOTSUPP;
40
41 return chip->info->ops->avb_ops->tai_read(chip, addr, data, len);
42}
43
44static int mv88e6xxx_tai_write(struct mv88e6xxx_chip *chip, int addr, u16 data)
45{
46 if (!chip->info->ops->avb_ops->tai_write)
47 return -EOPNOTSUPP;
48
49 return chip->info->ops->avb_ops->tai_write(chip, addr, data);
50}
51
52/* TODO: places where this are called should be using pinctrl */
53static int mv88e6xxx_set_gpio_func(struct mv88e6xxx_chip *chip, int pin,
54 int func, int input)
55{
56 int err;
57
58 if (!chip->info->ops->gpio_ops)
59 return -EOPNOTSUPP;
60
61 err = chip->info->ops->gpio_ops->set_dir(chip, pin, input);
62 if (err)
63 return err;
64
65 return chip->info->ops->gpio_ops->set_pctl(chip, pin, func);
66}
67
68static u64 mv88e6xxx_ptp_clock_read(const struct cyclecounter *cc)
69{
70 struct mv88e6xxx_chip *chip = cc_to_chip(cc);
71 u16 phc_time[2];
72 int err;
73
74 err = mv88e6xxx_tai_read(chip, MV88E6XXX_TAI_TIME_LO, phc_time,
75 ARRAY_SIZE(phc_time));
76 if (err)
77 return 0;
78 else
79 return ((u32)phc_time[1] << 16) | phc_time[0];
80}
81
82/* mv88e6xxx_config_eventcap - configure TAI event capture
83 * @event: PTP_CLOCK_PPS (internal) or PTP_CLOCK_EXTTS (external)
84 * @rising: zero for falling-edge trigger, else rising-edge trigger
85 *
86 * This will also reset the capture sequence counter.
87 */
88static int mv88e6xxx_config_eventcap(struct mv88e6xxx_chip *chip, int event,
89 int rising)
90{
91 u16 global_config;
92 u16 cap_config;
93 int err;
94
95 chip->evcap_config = MV88E6XXX_TAI_CFG_CAP_OVERWRITE |
96 MV88E6XXX_TAI_CFG_CAP_CTR_START;
97 if (!rising)
98 chip->evcap_config |= MV88E6XXX_TAI_CFG_EVREQ_FALLING;
99
100 global_config = (chip->evcap_config | chip->trig_config);
101 err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_CFG, global_config);
102 if (err)
103 return err;
104
105 if (event == PTP_CLOCK_PPS) {
106 cap_config = MV88E6XXX_TAI_EVENT_STATUS_CAP_TRIG;
107 } else if (event == PTP_CLOCK_EXTTS) {
108 /* if STATUS_CAP_TRIG is unset we capture PTP_EVREQ events */
109 cap_config = 0;
110 } else {
111 return -EINVAL;
112 }
113
114 /* Write the capture config; this also clears the capture counter */
115 err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_EVENT_STATUS,
116 cap_config);
117
118 return err;
119}
120
121static void mv88e6xxx_tai_event_work(struct work_struct *ugly)
122{
123 struct delayed_work *dw = to_delayed_work(ugly);
124 struct mv88e6xxx_chip *chip = dw_tai_event_to_chip(dw);
125 struct ptp_clock_event ev;
126 u16 status[4];
127 u32 raw_ts;
128 int err;
129
130 mutex_lock(&chip->reg_lock);
131 err = mv88e6xxx_tai_read(chip, MV88E6XXX_TAI_EVENT_STATUS,
132 status, ARRAY_SIZE(status));
133 mutex_unlock(&chip->reg_lock);
134
135 if (err) {
136 dev_err(chip->dev, "failed to read TAI status register\n");
137 return;
138 }
139 if (status[0] & MV88E6XXX_TAI_EVENT_STATUS_ERROR) {
140 dev_warn(chip->dev, "missed event capture\n");
141 return;
142 }
143 if (!(status[0] & MV88E6XXX_TAI_EVENT_STATUS_VALID))
144 goto out;
145
146 raw_ts = ((u32)status[2] << 16) | status[1];
147
148 /* Clear the valid bit so the next timestamp can come in */
149 status[0] &= ~MV88E6XXX_TAI_EVENT_STATUS_VALID;
150 mutex_lock(&chip->reg_lock);
151 err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_EVENT_STATUS, status[0]);
152 mutex_unlock(&chip->reg_lock);
153
154 /* This is an external timestamp */
155 ev.type = PTP_CLOCK_EXTTS;
156
157 /* We only have one timestamping channel. */
158 ev.index = 0;
159 mutex_lock(&chip->reg_lock);
160 ev.timestamp = timecounter_cyc2time(&chip->tstamp_tc, raw_ts);
161 mutex_unlock(&chip->reg_lock);
162
163 ptp_clock_event(chip->ptp_clock, &ev);
164out:
165 schedule_delayed_work(&chip->tai_event_work, TAI_EVENT_WORK_INTERVAL);
166}
167
168static int mv88e6xxx_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
169{
170 struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
171 int neg_adj = 0;
172 u32 diff, mult;
173 u64 adj;
174
175 if (scaled_ppm < 0) {
176 neg_adj = 1;
177 scaled_ppm = -scaled_ppm;
178 }
179 mult = CC_MULT;
180 adj = CC_MULT_NUM;
181 adj *= scaled_ppm;
182 diff = div_u64(adj, CC_MULT_DEM);
183
184 mutex_lock(&chip->reg_lock);
185
186 timecounter_read(&chip->tstamp_tc);
187 chip->tstamp_cc.mult = neg_adj ? mult - diff : mult + diff;
188
189 mutex_unlock(&chip->reg_lock);
190
191 return 0;
192}
193
194static int mv88e6xxx_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
195{
196 struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
197
198 mutex_lock(&chip->reg_lock);
199 timecounter_adjtime(&chip->tstamp_tc, delta);
200 mutex_unlock(&chip->reg_lock);
201
202 return 0;
203}
204
205static int mv88e6xxx_ptp_gettime(struct ptp_clock_info *ptp,
206 struct timespec64 *ts)
207{
208 struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
209 u64 ns;
210
211 mutex_lock(&chip->reg_lock);
212 ns = timecounter_read(&chip->tstamp_tc);
213 mutex_unlock(&chip->reg_lock);
214
215 *ts = ns_to_timespec64(ns);
216
217 return 0;
218}
219
220static int mv88e6xxx_ptp_settime(struct ptp_clock_info *ptp,
221 const struct timespec64 *ts)
222{
223 struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
224 u64 ns;
225
226 ns = timespec64_to_ns(ts);
227
228 mutex_lock(&chip->reg_lock);
229 timecounter_init(&chip->tstamp_tc, &chip->tstamp_cc, ns);
230 mutex_unlock(&chip->reg_lock);
231
232 return 0;
233}
234
235static int mv88e6xxx_ptp_enable_extts(struct mv88e6xxx_chip *chip,
236 struct ptp_clock_request *rq, int on)
237{
238 int rising = (rq->extts.flags & PTP_RISING_EDGE);
239 int func;
240 int pin;
241 int err;
242
243 pin = ptp_find_pin(chip->ptp_clock, PTP_PF_EXTTS, rq->extts.index);
244
245 if (pin < 0)
246 return -EBUSY;
247
248 mutex_lock(&chip->reg_lock);
249
250 if (on) {
251 func = MV88E6352_G2_SCRATCH_GPIO_PCTL_EVREQ;
252
253 err = mv88e6xxx_set_gpio_func(chip, pin, func, true);
254 if (err)
255 goto out;
256
257 schedule_delayed_work(&chip->tai_event_work,
258 TAI_EVENT_WORK_INTERVAL);
259
260 err = mv88e6xxx_config_eventcap(chip, PTP_CLOCK_EXTTS, rising);
261 } else {
262 func = MV88E6352_G2_SCRATCH_GPIO_PCTL_GPIO;
263
264 err = mv88e6xxx_set_gpio_func(chip, pin, func, true);
265
266 cancel_delayed_work_sync(&chip->tai_event_work);
267 }
268
269out:
270 mutex_unlock(&chip->reg_lock);
271
272 return err;
273}
274
275static int mv88e6xxx_ptp_enable(struct ptp_clock_info *ptp,
276 struct ptp_clock_request *rq, int on)
277{
278 struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
279
280 switch (rq->type) {
281 case PTP_CLK_REQ_EXTTS:
282 return mv88e6xxx_ptp_enable_extts(chip, rq, on);
283 default:
284 return -EOPNOTSUPP;
285 }
286}
287
288static int mv88e6xxx_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
289 enum ptp_pin_function func, unsigned int chan)
290{
291 switch (func) {
292 case PTP_PF_NONE:
293 case PTP_PF_EXTTS:
294 break;
295 case PTP_PF_PEROUT:
296 case PTP_PF_PHYSYNC:
297 return -EOPNOTSUPP;
298 }
299 return 0;
300}
301
302/* With a 125MHz input clock, the 32-bit timestamp counter overflows in ~34.3
303 * seconds; this task forces periodic reads so that we don't miss any.
304 */
305#define MV88E6XXX_TAI_OVERFLOW_PERIOD (HZ * 16)
306static void mv88e6xxx_ptp_overflow_check(struct work_struct *work)
307{
308 struct delayed_work *dw = to_delayed_work(work);
309 struct mv88e6xxx_chip *chip = dw_overflow_to_chip(dw);
310 struct timespec64 ts;
311
312 mv88e6xxx_ptp_gettime(&chip->ptp_clock_info, &ts);
313
314 schedule_delayed_work(&chip->overflow_work,
315 MV88E6XXX_TAI_OVERFLOW_PERIOD);
316}
317
318int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
319{
320 int i;
321
322 /* Set up the cycle counter */
323 memset(&chip->tstamp_cc, 0, sizeof(chip->tstamp_cc));
324 chip->tstamp_cc.read = mv88e6xxx_ptp_clock_read;
325 chip->tstamp_cc.mask = CYCLECOUNTER_MASK(32);
326 chip->tstamp_cc.mult = CC_MULT;
327 chip->tstamp_cc.shift = CC_SHIFT;
328
329 timecounter_init(&chip->tstamp_tc, &chip->tstamp_cc,
330 ktime_to_ns(ktime_get_real()));
331
332 INIT_DELAYED_WORK(&chip->overflow_work, mv88e6xxx_ptp_overflow_check);
333 INIT_DELAYED_WORK(&chip->tai_event_work, mv88e6xxx_tai_event_work);
334
335 chip->ptp_clock_info.owner = THIS_MODULE;
336 snprintf(chip->ptp_clock_info.name, sizeof(chip->ptp_clock_info.name),
337 dev_name(chip->dev));
338 chip->ptp_clock_info.max_adj = 1000000;
339
340 chip->ptp_clock_info.n_ext_ts = 1;
341 chip->ptp_clock_info.n_per_out = 0;
342 chip->ptp_clock_info.n_pins = mv88e6xxx_num_gpio(chip);
343 chip->ptp_clock_info.pps = 0;
344
345 for (i = 0; i < chip->ptp_clock_info.n_pins; ++i) {
346 struct ptp_pin_desc *ppd = &chip->pin_config[i];
347
348 snprintf(ppd->name, sizeof(ppd->name), "mv88e6xxx_gpio%d", i);
349 ppd->index = i;
350 ppd->func = PTP_PF_NONE;
351 }
352 chip->ptp_clock_info.pin_config = chip->pin_config;
353
354 chip->ptp_clock_info.adjfine = mv88e6xxx_ptp_adjfine;
355 chip->ptp_clock_info.adjtime = mv88e6xxx_ptp_adjtime;
356 chip->ptp_clock_info.gettime64 = mv88e6xxx_ptp_gettime;
357 chip->ptp_clock_info.settime64 = mv88e6xxx_ptp_settime;
358 chip->ptp_clock_info.enable = mv88e6xxx_ptp_enable;
359 chip->ptp_clock_info.verify = mv88e6xxx_ptp_verify;
360 chip->ptp_clock_info.do_aux_work = mv88e6xxx_hwtstamp_work;
361
362 chip->ptp_clock = ptp_clock_register(&chip->ptp_clock_info, chip->dev);
363 if (IS_ERR(chip->ptp_clock))
364 return PTR_ERR(chip->ptp_clock);
365
366 schedule_delayed_work(&chip->overflow_work,
367 MV88E6XXX_TAI_OVERFLOW_PERIOD);
368
369 return 0;
370}
371
372void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip)
373{
374 if (chip->ptp_clock) {
375 cancel_delayed_work_sync(&chip->overflow_work);
376 cancel_delayed_work_sync(&chip->tai_event_work);
377
378 ptp_clock_unregister(chip->ptp_clock);
379 chip->ptp_clock = NULL;
380 }
381}
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.h b/drivers/net/dsa/mv88e6xxx/ptp.h
new file mode 100644
index 000000000000..992818ade746
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/ptp.h
@@ -0,0 +1,108 @@
1/*
2 * Marvell 88E6xxx Switch PTP support
3 *
4 * Copyright (c) 2008 Marvell Semiconductor
5 *
6 * Copyright (c) 2017 National Instruments
7 * Erik Hons <erik.hons@ni.com>
8 * Brandon Streiff <brandon.streiff@ni.com>
9 * Dane Wagner <dane.wagner@ni.com>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 */
16
17#ifndef _MV88E6XXX_PTP_H
18#define _MV88E6XXX_PTP_H
19
20#include "chip.h"
21
22/* Offset 0x00: TAI Global Config */
23#define MV88E6XXX_TAI_CFG 0x00
24#define MV88E6XXX_TAI_CFG_CAP_OVERWRITE 0x8000
25#define MV88E6XXX_TAI_CFG_CAP_CTR_START 0x4000
26#define MV88E6XXX_TAI_CFG_EVREQ_FALLING 0x2000
27#define MV88E6XXX_TAI_CFG_TRIG_ACTIVE_LO 0x1000
28#define MV88E6XXX_TAI_CFG_IRL_ENABLE 0x0400
29#define MV88E6XXX_TAI_CFG_TRIG_IRQ_EN 0x0200
30#define MV88E6XXX_TAI_CFG_EVREQ_IRQ_EN 0x0100
31#define MV88E6XXX_TAI_CFG_TRIG_LOCK 0x0080
32#define MV88E6XXX_TAI_CFG_BLOCK_UPDATE 0x0008
33#define MV88E6XXX_TAI_CFG_MULTI_PTP 0x0004
34#define MV88E6XXX_TAI_CFG_TRIG_MODE_ONESHOT 0x0002
35#define MV88E6XXX_TAI_CFG_TRIG_ENABLE 0x0001
36
37/* Offset 0x01: Timestamp Clock Period (ps) */
38#define MV88E6XXX_TAI_CLOCK_PERIOD 0x01
39
40/* Offset 0x02/0x03: Trigger Generation Amount */
41#define MV88E6XXX_TAI_TRIG_GEN_AMOUNT_LO 0x02
42#define MV88E6XXX_TAI_TRIG_GEN_AMOUNT_HI 0x03
43
44/* Offset 0x04: Clock Compensation */
45#define MV88E6XXX_TAI_TRIG_CLOCK_COMP 0x04
46
47/* Offset 0x05: Trigger Configuration */
48#define MV88E6XXX_TAI_TRIG_CFG 0x05
49
50/* Offset 0x06: Ingress Rate Limiter Clock Generation Amount */
51#define MV88E6XXX_TAI_IRL_AMOUNT 0x06
52
53/* Offset 0x07: Ingress Rate Limiter Compensation */
54#define MV88E6XXX_TAI_IRL_COMP 0x07
55
56/* Offset 0x08: Ingress Rate Limiter Compensation */
57#define MV88E6XXX_TAI_IRL_COMP_PS 0x08
58
59/* Offset 0x09: Event Status */
60#define MV88E6XXX_TAI_EVENT_STATUS 0x09
61#define MV88E6XXX_TAI_EVENT_STATUS_CAP_TRIG 0x4000
62#define MV88E6XXX_TAI_EVENT_STATUS_ERROR 0x0200
63#define MV88E6XXX_TAI_EVENT_STATUS_VALID 0x0100
64#define MV88E6XXX_TAI_EVENT_STATUS_CTR_MASK 0x00ff
65
66/* Offset 0x0A/0x0B: Event Time */
67#define MV88E6XXX_TAI_EVENT_TIME_LO 0x0a
68#define MV88E6XXX_TAI_EVENT_TYPE_HI 0x0b
69
70/* Offset 0x0E/0x0F: PTP Global Time */
71#define MV88E6XXX_TAI_TIME_LO 0x0e
72#define MV88E6XXX_TAI_TIME_HI 0x0f
73
74/* Offset 0x10/0x11: Trig Generation Time */
75#define MV88E6XXX_TAI_TRIG_TIME_LO 0x10
76#define MV88E6XXX_TAI_TRIG_TIME_HI 0x11
77
78/* Offset 0x12: Lock Status */
79#define MV88E6XXX_TAI_LOCK_STATUS 0x12
80
81#ifdef CONFIG_NET_DSA_MV88E6XXX_PTP
82
83long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp);
84int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip);
85void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip);
86
87#define ptp_to_chip(ptp) container_of(ptp, struct mv88e6xxx_chip, \
88 ptp_clock_info)
89
90#else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */
91
92static long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp)
93{
94 return -1;
95}
96
97static inline int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
98{
99 return 0;
100}
101
102static void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip)
103{
104}
105
106#endif /* CONFIG_NET_DSA_MV88E6XXX_PTP */
107
108#endif /* _MV88E6XXX_PTP_H */
diff --git a/drivers/net/ethernet/8390/Makefile b/drivers/net/ethernet/8390/Makefile
index f975c2fc88a3..1d650e66cc6e 100644
--- a/drivers/net/ethernet/8390/Makefile
+++ b/drivers/net/ethernet/8390/Makefile
@@ -7,8 +7,8 @@ obj-$(CONFIG_MAC8390) += mac8390.o
7obj-$(CONFIG_APNE) += apne.o 8390.o 7obj-$(CONFIG_APNE) += apne.o 8390.o
8obj-$(CONFIG_ARM_ETHERH) += etherh.o 8obj-$(CONFIG_ARM_ETHERH) += etherh.o
9obj-$(CONFIG_AX88796) += ax88796.o 9obj-$(CONFIG_AX88796) += ax88796.o
10obj-$(CONFIG_HYDRA) += hydra.o 8390.o 10obj-$(CONFIG_HYDRA) += hydra.o
11obj-$(CONFIG_MCF8390) += mcf8390.o 8390.o 11obj-$(CONFIG_MCF8390) += mcf8390.o
12obj-$(CONFIG_NE2000) += ne.o 8390p.o 12obj-$(CONFIG_NE2000) += ne.o 8390p.o
13obj-$(CONFIG_NE2K_PCI) += ne2k-pci.o 8390.o 13obj-$(CONFIG_NE2K_PCI) += ne2k-pci.o 8390.o
14obj-$(CONFIG_PCMCIA_AXNET) += axnet_cs.o 8390.o 14obj-$(CONFIG_PCMCIA_AXNET) += axnet_cs.o 8390.o
@@ -16,4 +16,4 @@ obj-$(CONFIG_PCMCIA_PCNET) += pcnet_cs.o 8390.o
16obj-$(CONFIG_STNIC) += stnic.o 8390.o 16obj-$(CONFIG_STNIC) += stnic.o 8390.o
17obj-$(CONFIG_ULTRA) += smc-ultra.o 8390.o 17obj-$(CONFIG_ULTRA) += smc-ultra.o 8390.o
18obj-$(CONFIG_WD80x3) += wd.o 8390.o 18obj-$(CONFIG_WD80x3) += wd.o 8390.o
19obj-$(CONFIG_ZORRO8390) += zorro8390.o 8390.o 19obj-$(CONFIG_ZORRO8390) += zorro8390.o
diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index 245554707163..da61cf3cb3a9 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -77,8 +77,6 @@ static unsigned char version[] = "ax88796.c: Copyright 2005,2007 Simtec Electron
77 77
78#define AX_GPOC_PPDSET BIT(6) 78#define AX_GPOC_PPDSET BIT(6)
79 79
80static u32 ax_msg_enable;
81
82/* device private data */ 80/* device private data */
83 81
84struct ax_device { 82struct ax_device {
@@ -747,7 +745,6 @@ static int ax_init_dev(struct net_device *dev)
747 ei_local->block_output = &ax_block_output; 745 ei_local->block_output = &ax_block_output;
748 ei_local->get_8390_hdr = &ax_get_8390_hdr; 746 ei_local->get_8390_hdr = &ax_get_8390_hdr;
749 ei_local->priv = 0; 747 ei_local->priv = 0;
750 ei_local->msg_enable = ax_msg_enable;
751 748
752 dev->netdev_ops = &ax_netdev_ops; 749 dev->netdev_ops = &ax_netdev_ops;
753 dev->ethtool_ops = &ax_ethtool_ops; 750 dev->ethtool_ops = &ax_ethtool_ops;
diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c
index 7bddb8efb6d5..d422a124cd7c 100644
--- a/drivers/net/ethernet/8390/axnet_cs.c
+++ b/drivers/net/ethernet/8390/axnet_cs.c
@@ -104,7 +104,6 @@ static void AX88190_init(struct net_device *dev, int startp);
104static int ax_open(struct net_device *dev); 104static int ax_open(struct net_device *dev);
105static int ax_close(struct net_device *dev); 105static int ax_close(struct net_device *dev);
106static irqreturn_t ax_interrupt(int irq, void *dev_id); 106static irqreturn_t ax_interrupt(int irq, void *dev_id);
107static u32 axnet_msg_enable;
108 107
109/*====================================================================*/ 108/*====================================================================*/
110 109
@@ -151,7 +150,6 @@ static int axnet_probe(struct pcmcia_device *link)
151 return -ENOMEM; 150 return -ENOMEM;
152 151
153 ei_local = netdev_priv(dev); 152 ei_local = netdev_priv(dev);
154 ei_local->msg_enable = axnet_msg_enable;
155 spin_lock_init(&ei_local->page_lock); 153 spin_lock_init(&ei_local->page_lock);
156 154
157 info = PRIV(dev); 155 info = PRIV(dev);
diff --git a/drivers/net/ethernet/8390/etherh.c b/drivers/net/ethernet/8390/etherh.c
index 11cbf22ad201..32e9627e3880 100644
--- a/drivers/net/ethernet/8390/etherh.c
+++ b/drivers/net/ethernet/8390/etherh.c
@@ -64,8 +64,6 @@ static char version[] =
64 64
65#include "lib8390.c" 65#include "lib8390.c"
66 66
67static u32 etherh_msg_enable;
68
69struct etherh_priv { 67struct etherh_priv {
70 void __iomem *ioc_fast; 68 void __iomem *ioc_fast;
71 void __iomem *memc; 69 void __iomem *memc;
@@ -502,18 +500,6 @@ etherh_close(struct net_device *dev)
502} 500}
503 501
504/* 502/*
505 * Initialisation
506 */
507
508static void __init etherh_banner(void)
509{
510 static int version_printed;
511
512 if ((etherh_msg_enable & NETIF_MSG_DRV) && (version_printed++ == 0))
513 pr_info("%s", version);
514}
515
516/*
517 * Read the ethernet address string from the on board rom. 503 * Read the ethernet address string from the on board rom.
518 * This is an ascii string... 504 * This is an ascii string...
519 */ 505 */
@@ -671,8 +657,6 @@ etherh_probe(struct expansion_card *ec, const struct ecard_id *id)
671 struct etherh_priv *eh; 657 struct etherh_priv *eh;
672 int ret; 658 int ret;
673 659
674 etherh_banner();
675
676 ret = ecard_request_resources(ec); 660 ret = ecard_request_resources(ec);
677 if (ret) 661 if (ret)
678 goto out; 662 goto out;
@@ -757,7 +741,6 @@ etherh_probe(struct expansion_card *ec, const struct ecard_id *id)
757 ei_local->block_output = etherh_block_output; 741 ei_local->block_output = etherh_block_output;
758 ei_local->get_8390_hdr = etherh_get_header; 742 ei_local->get_8390_hdr = etherh_get_header;
759 ei_local->interface_num = 0; 743 ei_local->interface_num = 0;
760 ei_local->msg_enable = etherh_msg_enable;
761 744
762 etherh_reset(dev); 745 etherh_reset(dev);
763 __NS8390_init(dev, 0); 746 __NS8390_init(dev, 0);
diff --git a/drivers/net/ethernet/8390/hydra.c b/drivers/net/ethernet/8390/hydra.c
index 8ae249195301..941754ea78ec 100644
--- a/drivers/net/ethernet/8390/hydra.c
+++ b/drivers/net/ethernet/8390/hydra.c
@@ -66,7 +66,6 @@ static void hydra_block_input(struct net_device *dev, int count,
66static void hydra_block_output(struct net_device *dev, int count, 66static void hydra_block_output(struct net_device *dev, int count,
67 const unsigned char *buf, int start_page); 67 const unsigned char *buf, int start_page);
68static void hydra_remove_one(struct zorro_dev *z); 68static void hydra_remove_one(struct zorro_dev *z);
69static u32 hydra_msg_enable;
70 69
71static struct zorro_device_id hydra_zorro_tbl[] = { 70static struct zorro_device_id hydra_zorro_tbl[] = {
72 { ZORRO_PROD_HYDRA_SYSTEMS_AMIGANET }, 71 { ZORRO_PROD_HYDRA_SYSTEMS_AMIGANET },
@@ -119,7 +118,6 @@ static int hydra_init(struct zorro_dev *z)
119 int start_page, stop_page; 118 int start_page, stop_page;
120 int j; 119 int j;
121 int err; 120 int err;
122 struct ei_device *ei_local;
123 121
124 static u32 hydra_offsets[16] = { 122 static u32 hydra_offsets[16] = {
125 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 123 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e,
@@ -138,8 +136,6 @@ static int hydra_init(struct zorro_dev *z)
138 start_page = NESM_START_PG; 136 start_page = NESM_START_PG;
139 stop_page = NESM_STOP_PG; 137 stop_page = NESM_STOP_PG;
140 138
141 ei_local = netdev_priv(dev);
142 ei_local->msg_enable = hydra_msg_enable;
143 dev->base_addr = ioaddr; 139 dev->base_addr = ioaddr;
144 dev->irq = IRQ_AMIGA_PORTS; 140 dev->irq = IRQ_AMIGA_PORTS;
145 141
diff --git a/drivers/net/ethernet/8390/lib8390.c b/drivers/net/ethernet/8390/lib8390.c
index 60f8e2c8e726..5d9bbde9fe68 100644
--- a/drivers/net/ethernet/8390/lib8390.c
+++ b/drivers/net/ethernet/8390/lib8390.c
@@ -975,6 +975,8 @@ static void ethdev_setup(struct net_device *dev)
975 ether_setup(dev); 975 ether_setup(dev);
976 976
977 spin_lock_init(&ei_local->page_lock); 977 spin_lock_init(&ei_local->page_lock);
978
979 ei_local->msg_enable = msg_enable;
978} 980}
979 981
980/** 982/**
diff --git a/drivers/net/ethernet/8390/mac8390.c b/drivers/net/ethernet/8390/mac8390.c
index 2f91ce8dc614..b6d735bf8011 100644
--- a/drivers/net/ethernet/8390/mac8390.c
+++ b/drivers/net/ethernet/8390/mac8390.c
@@ -123,8 +123,7 @@ enum mac8390_access {
123}; 123};
124 124
125extern int mac8390_memtest(struct net_device *dev); 125extern int mac8390_memtest(struct net_device *dev);
126static int mac8390_initdev(struct net_device *dev, 126static int mac8390_initdev(struct net_device *dev, struct nubus_board *board,
127 struct nubus_rsrc *ndev,
128 enum mac8390_type type); 127 enum mac8390_type type);
129 128
130static int mac8390_open(struct net_device *dev); 129static int mac8390_open(struct net_device *dev);
@@ -168,9 +167,8 @@ static void slow_sane_block_output(struct net_device *dev, int count,
168 const unsigned char *buf, int start_page); 167 const unsigned char *buf, int start_page);
169static void word_memcpy_tocard(unsigned long tp, const void *fp, int count); 168static void word_memcpy_tocard(unsigned long tp, const void *fp, int count);
170static void word_memcpy_fromcard(void *tp, unsigned long fp, int count); 169static void word_memcpy_fromcard(void *tp, unsigned long fp, int count);
171static u32 mac8390_msg_enable;
172 170
173static enum mac8390_type __init mac8390_ident(struct nubus_rsrc *fres) 171static enum mac8390_type mac8390_ident(struct nubus_rsrc *fres)
174{ 172{
175 switch (fres->dr_sw) { 173 switch (fres->dr_sw) {
176 case NUBUS_DRSW_3COM: 174 case NUBUS_DRSW_3COM:
@@ -236,7 +234,7 @@ static enum mac8390_type __init mac8390_ident(struct nubus_rsrc *fres)
236 return MAC8390_NONE; 234 return MAC8390_NONE;
237} 235}
238 236
239static enum mac8390_access __init mac8390_testio(volatile unsigned long membase) 237static enum mac8390_access mac8390_testio(unsigned long membase)
240{ 238{
241 unsigned long outdata = 0xA5A0B5B0; 239 unsigned long outdata = 0xA5A0B5B0;
242 unsigned long indata = 0x00000000; 240 unsigned long indata = 0x00000000;
@@ -254,7 +252,7 @@ static enum mac8390_access __init mac8390_testio(volatile unsigned long membase)
254 return ACCESS_UNKNOWN; 252 return ACCESS_UNKNOWN;
255} 253}
256 254
257static int __init mac8390_memsize(unsigned long membase) 255static int mac8390_memsize(unsigned long membase)
258{ 256{
259 unsigned long flags; 257 unsigned long flags;
260 int i, j; 258 int i, j;
@@ -290,36 +288,34 @@ static int __init mac8390_memsize(unsigned long membase)
290 return i * 0x1000; 288 return i * 0x1000;
291} 289}
292 290
293static bool __init mac8390_init(struct net_device *dev, 291static bool mac8390_rsrc_init(struct net_device *dev,
294 struct nubus_rsrc *ndev, 292 struct nubus_rsrc *fres,
295 enum mac8390_type cardtype) 293 enum mac8390_type cardtype)
296{ 294{
295 struct nubus_board *board = fres->board;
297 struct nubus_dir dir; 296 struct nubus_dir dir;
298 struct nubus_dirent ent; 297 struct nubus_dirent ent;
299 int offset; 298 int offset;
300 volatile unsigned short *i; 299 volatile unsigned short *i;
301 300
302 printk_once(KERN_INFO pr_fmt("%s"), version); 301 dev->irq = SLOT2IRQ(board->slot);
303
304 dev->irq = SLOT2IRQ(ndev->board->slot);
305 /* This is getting to be a habit */ 302 /* This is getting to be a habit */
306 dev->base_addr = (ndev->board->slot_addr | 303 dev->base_addr = board->slot_addr | ((board->slot & 0xf) << 20);
307 ((ndev->board->slot & 0xf) << 20));
308 304
309 /* 305 /*
310 * Get some Nubus info - we will trust the card's idea 306 * Get some Nubus info - we will trust the card's idea
311 * of where its memory and registers are. 307 * of where its memory and registers are.
312 */ 308 */
313 309
314 if (nubus_get_func_dir(ndev, &dir) == -1) { 310 if (nubus_get_func_dir(fres, &dir) == -1) {
315 pr_err("%s: Unable to get Nubus functional directory for slot %X!\n", 311 dev_err(&board->dev,
316 dev->name, ndev->board->slot); 312 "Unable to get Nubus functional directory\n");
317 return false; 313 return false;
318 } 314 }
319 315
320 /* Get the MAC address */ 316 /* Get the MAC address */
321 if (nubus_find_rsrc(&dir, NUBUS_RESID_MAC_ADDRESS, &ent) == -1) { 317 if (nubus_find_rsrc(&dir, NUBUS_RESID_MAC_ADDRESS, &ent) == -1) {
322 pr_info("%s: Couldn't get MAC address!\n", dev->name); 318 dev_info(&board->dev, "MAC address resource not found\n");
323 return false; 319 return false;
324 } 320 }
325 321
@@ -329,8 +325,8 @@ static bool __init mac8390_init(struct net_device *dev,
329 nubus_rewinddir(&dir); 325 nubus_rewinddir(&dir);
330 if (nubus_find_rsrc(&dir, NUBUS_RESID_MINOR_BASEOS, 326 if (nubus_find_rsrc(&dir, NUBUS_RESID_MINOR_BASEOS,
331 &ent) == -1) { 327 &ent) == -1) {
332 pr_err("%s: Memory offset resource for slot %X not found!\n", 328 dev_err(&board->dev,
333 dev->name, ndev->board->slot); 329 "Memory offset resource not found\n");
334 return false; 330 return false;
335 } 331 }
336 nubus_get_rsrc_mem(&offset, &ent, 4); 332 nubus_get_rsrc_mem(&offset, &ent, 4);
@@ -340,8 +336,8 @@ static bool __init mac8390_init(struct net_device *dev,
340 nubus_rewinddir(&dir); 336 nubus_rewinddir(&dir);
341 if (nubus_find_rsrc(&dir, NUBUS_RESID_MINOR_LENGTH, 337 if (nubus_find_rsrc(&dir, NUBUS_RESID_MINOR_LENGTH,
342 &ent) == -1) { 338 &ent) == -1) {
343 pr_info("%s: Memory length resource for slot %X not found, probing\n", 339 dev_info(&board->dev,
344 dev->name, ndev->board->slot); 340 "Memory length resource not found, probing\n");
345 offset = mac8390_memsize(dev->mem_start); 341 offset = mac8390_memsize(dev->mem_start);
346 } else { 342 } else {
347 nubus_get_rsrc_mem(&offset, &ent, 4); 343 nubus_get_rsrc_mem(&offset, &ent, 4);
@@ -351,25 +347,25 @@ static bool __init mac8390_init(struct net_device *dev,
351 switch (cardtype) { 347 switch (cardtype) {
352 case MAC8390_KINETICS: 348 case MAC8390_KINETICS:
353 case MAC8390_DAYNA: /* it's the same */ 349 case MAC8390_DAYNA: /* it's the same */
354 dev->base_addr = (int)(ndev->board->slot_addr + 350 dev->base_addr = (int)(board->slot_addr +
355 DAYNA_8390_BASE); 351 DAYNA_8390_BASE);
356 dev->mem_start = (int)(ndev->board->slot_addr + 352 dev->mem_start = (int)(board->slot_addr +
357 DAYNA_8390_MEM); 353 DAYNA_8390_MEM);
358 dev->mem_end = dev->mem_start + 354 dev->mem_end = dev->mem_start +
359 mac8390_memsize(dev->mem_start); 355 mac8390_memsize(dev->mem_start);
360 break; 356 break;
361 case MAC8390_INTERLAN: 357 case MAC8390_INTERLAN:
362 dev->base_addr = (int)(ndev->board->slot_addr + 358 dev->base_addr = (int)(board->slot_addr +
363 INTERLAN_8390_BASE); 359 INTERLAN_8390_BASE);
364 dev->mem_start = (int)(ndev->board->slot_addr + 360 dev->mem_start = (int)(board->slot_addr +
365 INTERLAN_8390_MEM); 361 INTERLAN_8390_MEM);
366 dev->mem_end = dev->mem_start + 362 dev->mem_end = dev->mem_start +
367 mac8390_memsize(dev->mem_start); 363 mac8390_memsize(dev->mem_start);
368 break; 364 break;
369 case MAC8390_CABLETRON: 365 case MAC8390_CABLETRON:
370 dev->base_addr = (int)(ndev->board->slot_addr + 366 dev->base_addr = (int)(board->slot_addr +
371 CABLETRON_8390_BASE); 367 CABLETRON_8390_BASE);
372 dev->mem_start = (int)(ndev->board->slot_addr + 368 dev->mem_start = (int)(board->slot_addr +
373 CABLETRON_8390_MEM); 369 CABLETRON_8390_MEM);
374 /* The base address is unreadable if 0x00 370 /* The base address is unreadable if 0x00
375 * has been written to the command register 371 * has been written to the command register
@@ -384,8 +380,8 @@ static bool __init mac8390_init(struct net_device *dev,
384 break; 380 break;
385 381
386 default: 382 default:
387 pr_err("Card type %s is unsupported, sorry\n", 383 dev_err(&board->dev,
388 ndev->board->name); 384 "No known base address for card type\n");
389 return false; 385 return false;
390 } 386 }
391 } 387 }
@@ -393,91 +389,83 @@ static bool __init mac8390_init(struct net_device *dev,
393 return true; 389 return true;
394} 390}
395 391
396struct net_device * __init mac8390_probe(int unit) 392static int mac8390_device_probe(struct nubus_board *board)
397{ 393{
398 struct net_device *dev; 394 struct net_device *dev;
399 struct nubus_rsrc *ndev = NULL;
400 int err = -ENODEV; 395 int err = -ENODEV;
401 struct ei_device *ei_local; 396 struct nubus_rsrc *fres;
402 397 enum mac8390_type cardtype = MAC8390_NONE;
403 static unsigned int slots;
404
405 enum mac8390_type cardtype;
406
407 /* probably should check for Nubus instead */
408
409 if (!MACH_IS_MAC)
410 return ERR_PTR(-ENODEV);
411 398
412 dev = ____alloc_ei_netdev(0); 399 dev = ____alloc_ei_netdev(0);
413 if (!dev) 400 if (!dev)
414 return ERR_PTR(-ENOMEM); 401 return -ENOMEM;
415
416 if (unit >= 0)
417 sprintf(dev->name, "eth%d", unit);
418 402
419 for_each_func_rsrc(ndev) { 403 SET_NETDEV_DEV(dev, &board->dev);
420 if (ndev->category != NUBUS_CAT_NETWORK ||
421 ndev->type != NUBUS_TYPE_ETHERNET)
422 continue;
423 404
424 /* Have we seen it already? */ 405 for_each_board_func_rsrc(board, fres) {
425 if (slots & (1 << ndev->board->slot)) 406 if (fres->category != NUBUS_CAT_NETWORK ||
407 fres->type != NUBUS_TYPE_ETHERNET)
426 continue; 408 continue;
427 slots |= 1 << ndev->board->slot;
428 409
429 cardtype = mac8390_ident(ndev); 410 cardtype = mac8390_ident(fres);
430 if (cardtype == MAC8390_NONE) 411 if (cardtype == MAC8390_NONE)
431 continue; 412 continue;
432 413
433 if (!mac8390_init(dev, ndev, cardtype)) 414 if (mac8390_rsrc_init(dev, fres, cardtype))
434 continue;
435
436 /* Do the nasty 8390 stuff */
437 if (!mac8390_initdev(dev, ndev, cardtype))
438 break; 415 break;
439 } 416 }
440 417 if (!fres)
441 if (!ndev)
442 goto out; 418 goto out;
443 419
444 ei_local = netdev_priv(dev); 420 err = mac8390_initdev(dev, board, cardtype);
445 ei_local->msg_enable = mac8390_msg_enable; 421 if (err)
422 goto out;
446 423
447 err = register_netdev(dev); 424 err = register_netdev(dev);
448 if (err) 425 if (err)
449 goto out; 426 goto out;
450 return dev; 427
428 nubus_set_drvdata(board, dev);
429 return 0;
451 430
452out: 431out:
453 free_netdev(dev); 432 free_netdev(dev);
454 return ERR_PTR(err); 433 return err;
434}
435
436static int mac8390_device_remove(struct nubus_board *board)
437{
438 struct net_device *dev = nubus_get_drvdata(board);
439
440 unregister_netdev(dev);
441 free_netdev(dev);
442 return 0;
455} 443}
456 444
457#ifdef MODULE 445static struct nubus_driver mac8390_driver = {
446 .probe = mac8390_device_probe,
447 .remove = mac8390_device_remove,
448 .driver = {
449 .name = KBUILD_MODNAME,
450 .owner = THIS_MODULE,
451 }
452};
453
458MODULE_AUTHOR("David Huggins-Daines <dhd@debian.org> and others"); 454MODULE_AUTHOR("David Huggins-Daines <dhd@debian.org> and others");
459MODULE_DESCRIPTION("Macintosh NS8390-based Nubus Ethernet driver"); 455MODULE_DESCRIPTION("Macintosh NS8390-based Nubus Ethernet driver");
460MODULE_LICENSE("GPL"); 456MODULE_LICENSE("GPL");
461 457
462static struct net_device *dev_mac8390; 458static int __init mac8390_init(void)
463
464int __init init_module(void)
465{ 459{
466 dev_mac8390 = mac8390_probe(-1); 460 return nubus_driver_register(&mac8390_driver);
467 if (IS_ERR(dev_mac8390)) {
468 pr_warn("mac8390: No card found\n");
469 return PTR_ERR(dev_mac8390);
470 }
471 return 0;
472} 461}
462module_init(mac8390_init);
473 463
474void __exit cleanup_module(void) 464static void __exit mac8390_exit(void)
475{ 465{
476 unregister_netdev(dev_mac8390); 466 nubus_driver_unregister(&mac8390_driver);
477 free_netdev(dev_mac8390);
478} 467}
479 468module_exit(mac8390_exit);
480#endif /* MODULE */
481 469
482static const struct net_device_ops mac8390_netdev_ops = { 470static const struct net_device_ops mac8390_netdev_ops = {
483 .ndo_open = mac8390_open, 471 .ndo_open = mac8390_open,
@@ -493,9 +481,8 @@ static const struct net_device_ops mac8390_netdev_ops = {
493#endif 481#endif
494}; 482};
495 483
496static int __init mac8390_initdev(struct net_device *dev, 484static int mac8390_initdev(struct net_device *dev, struct nubus_board *board,
497 struct nubus_rsrc *ndev, 485 enum mac8390_type type)
498 enum mac8390_type type)
499{ 486{
500 static u32 fwrd4_offsets[16] = { 487 static u32 fwrd4_offsets[16] = {
501 0, 4, 8, 12, 488 0, 4, 8, 12,
@@ -546,7 +533,8 @@ static int __init mac8390_initdev(struct net_device *dev,
546 case MAC8390_APPLE: 533 case MAC8390_APPLE:
547 switch (mac8390_testio(dev->mem_start)) { 534 switch (mac8390_testio(dev->mem_start)) {
548 case ACCESS_UNKNOWN: 535 case ACCESS_UNKNOWN:
549 pr_err("Don't know how to access card memory!\n"); 536 dev_err(&board->dev,
537 "Don't know how to access card memory\n");
550 return -ENODEV; 538 return -ENODEV;
551 539
552 case ACCESS_16: 540 case ACCESS_16:
@@ -612,21 +600,18 @@ static int __init mac8390_initdev(struct net_device *dev,
612 break; 600 break;
613 601
614 default: 602 default:
615 pr_err("Card type %s is unsupported, sorry\n", 603 dev_err(&board->dev, "Unsupported card type\n");
616 ndev->board->name);
617 return -ENODEV; 604 return -ENODEV;
618 } 605 }
619 606
620 __NS8390_init(dev, 0); 607 __NS8390_init(dev, 0);
621 608
622 /* Good, done, now spit out some messages */ 609 /* Good, done, now spit out some messages */
623 pr_info("%s: %s in slot %X (type %s)\n", 610 dev_info(&board->dev, "%s (type %s)\n", board->name, cardname[type]);
624 dev->name, ndev->board->name, ndev->board->slot, 611 dev_info(&board->dev, "MAC %pM, IRQ %d, %d KB shared memory at %#lx, %d-bit access.\n",
625 cardname[type]); 612 dev->dev_addr, dev->irq,
626 pr_info("MAC %pM IRQ %d, %d KB shared memory at %#lx, %d-bit access.\n", 613 (unsigned int)(dev->mem_end - dev->mem_start) >> 10,
627 dev->dev_addr, dev->irq, 614 dev->mem_start, access_bitmode ? 32 : 16);
628 (unsigned int)(dev->mem_end - dev->mem_start) >> 10,
629 dev->mem_start, access_bitmode ? 32 : 16);
630 return 0; 615 return 0;
631} 616}
632 617
diff --git a/drivers/net/ethernet/8390/mcf8390.c b/drivers/net/ethernet/8390/mcf8390.c
index 4bb967bc879e..4ad8031ab669 100644
--- a/drivers/net/ethernet/8390/mcf8390.c
+++ b/drivers/net/ethernet/8390/mcf8390.c
@@ -38,7 +38,6 @@ static const char version[] =
38 38
39#define NESM_START_PG 0x40 /* First page of TX buffer */ 39#define NESM_START_PG 0x40 /* First page of TX buffer */
40#define NESM_STOP_PG 0x80 /* Last page +1 of RX ring */ 40#define NESM_STOP_PG 0x80 /* Last page +1 of RX ring */
41static u32 mcf8390_msg_enable;
42 41
43#ifdef NE2000_ODDOFFSET 42#ifdef NE2000_ODDOFFSET
44/* 43/*
@@ -407,7 +406,6 @@ static int mcf8390_init(struct net_device *dev)
407static int mcf8390_probe(struct platform_device *pdev) 406static int mcf8390_probe(struct platform_device *pdev)
408{ 407{
409 struct net_device *dev; 408 struct net_device *dev;
410 struct ei_device *ei_local;
411 struct resource *mem, *irq; 409 struct resource *mem, *irq;
412 resource_size_t msize; 410 resource_size_t msize;
413 int ret; 411 int ret;
@@ -435,8 +433,6 @@ static int mcf8390_probe(struct platform_device *pdev)
435 433
436 SET_NETDEV_DEV(dev, &pdev->dev); 434 SET_NETDEV_DEV(dev, &pdev->dev);
437 platform_set_drvdata(pdev, dev); 435 platform_set_drvdata(pdev, dev);
438 ei_local = netdev_priv(dev);
439 ei_local->msg_enable = mcf8390_msg_enable;
440 436
441 dev->irq = irq->start; 437 dev->irq = irq->start;
442 dev->base_addr = mem->start; 438 dev->base_addr = mem->start;
diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c
index 66f47987e2a2..4cdff6e6af89 100644
--- a/drivers/net/ethernet/8390/ne.c
+++ b/drivers/net/ethernet/8390/ne.c
@@ -485,7 +485,7 @@ static int __init ne_probe1(struct net_device *dev, unsigned long ioaddr)
485 mdelay(10); /* wait 10ms for interrupt to propagate */ 485 mdelay(10); /* wait 10ms for interrupt to propagate */
486 outb_p(0x00, ioaddr + EN0_IMR); /* Mask it again. */ 486 outb_p(0x00, ioaddr + EN0_IMR); /* Mask it again. */
487 dev->irq = probe_irq_off(cookie); 487 dev->irq = probe_irq_off(cookie);
488 if (netif_msg_probe(ei_local)) 488 if (ne_msg_enable & NETIF_MSG_PROBE)
489 pr_cont(" autoirq is %d", dev->irq); 489 pr_cont(" autoirq is %d", dev->irq);
490 } else if (dev->irq == 2) 490 } else if (dev->irq == 2)
491 /* Fixup for users that don't know that IRQ 2 is really IRQ 9, 491 /* Fixup for users that don't know that IRQ 2 is really IRQ 9,
diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c
index bcad4a7fac9f..61e43802b9a5 100644
--- a/drivers/net/ethernet/8390/pcnet_cs.c
+++ b/drivers/net/ethernet/8390/pcnet_cs.c
@@ -66,7 +66,6 @@
66#define PCNET_RDC_TIMEOUT (2*HZ/100) /* Max wait in jiffies for Tx RDC */ 66#define PCNET_RDC_TIMEOUT (2*HZ/100) /* Max wait in jiffies for Tx RDC */
67 67
68static const char *if_names[] = { "auto", "10baseT", "10base2"}; 68static const char *if_names[] = { "auto", "10baseT", "10base2"};
69static u32 pcnet_msg_enable;
70 69
71/*====================================================================*/ 70/*====================================================================*/
72 71
@@ -556,7 +555,6 @@ static int pcnet_config(struct pcmcia_device *link)
556 int start_pg, stop_pg, cm_offset; 555 int start_pg, stop_pg, cm_offset;
557 int has_shmem = 0; 556 int has_shmem = 0;
558 struct hw_info *local_hw_info; 557 struct hw_info *local_hw_info;
559 struct ei_device *ei_local;
560 558
561 dev_dbg(&link->dev, "pcnet_config\n"); 559 dev_dbg(&link->dev, "pcnet_config\n");
562 560
@@ -606,8 +604,6 @@ static int pcnet_config(struct pcmcia_device *link)
606 mii_phy_probe(dev); 604 mii_phy_probe(dev);
607 605
608 SET_NETDEV_DEV(dev, &link->dev); 606 SET_NETDEV_DEV(dev, &link->dev);
609 ei_local = netdev_priv(dev);
610 ei_local->msg_enable = pcnet_msg_enable;
611 607
612 if (register_netdev(dev) != 0) { 608 if (register_netdev(dev) != 0) {
613 pr_notice("register_netdev() failed\n"); 609 pr_notice("register_netdev() failed\n");
diff --git a/drivers/net/ethernet/8390/wd.c b/drivers/net/ethernet/8390/wd.c
index 6efa2722f850..fb17c2c7e1dd 100644
--- a/drivers/net/ethernet/8390/wd.c
+++ b/drivers/net/ethernet/8390/wd.c
@@ -299,7 +299,7 @@ static int __init wd_probe1(struct net_device *dev, int ioaddr)
299 299
300 outb_p(0x00, nic_addr+EN0_IMR); /* Mask all intrs. again. */ 300 outb_p(0x00, nic_addr+EN0_IMR); /* Mask all intrs. again. */
301 301
302 if (netif_msg_drv(ei_local)) 302 if (wd_msg_enable & NETIF_MSG_PROBE)
303 pr_cont(" autoirq is %d", dev->irq); 303 pr_cont(" autoirq is %d", dev->irq);
304 if (dev->irq < 2) 304 if (dev->irq < 2)
305 dev->irq = word16 ? 10 : 5; 305 dev->irq = word16 ? 10 : 5;
diff --git a/drivers/net/ethernet/8390/zorro8390.c b/drivers/net/ethernet/8390/zorro8390.c
index 6d93956b293b..35a500a21521 100644
--- a/drivers/net/ethernet/8390/zorro8390.c
+++ b/drivers/net/ethernet/8390/zorro8390.c
@@ -44,8 +44,6 @@
44static const char version[] = 44static const char version[] =
45 "8390.c:v1.10cvs 9/23/94 Donald Becker (becker@cesdis.gsfc.nasa.gov)\n"; 45 "8390.c:v1.10cvs 9/23/94 Donald Becker (becker@cesdis.gsfc.nasa.gov)\n";
46 46
47static u32 zorro8390_msg_enable;
48
49#include "lib8390.c" 47#include "lib8390.c"
50 48
51#define DRV_NAME "zorro8390" 49#define DRV_NAME "zorro8390"
@@ -296,7 +294,6 @@ static int zorro8390_init(struct net_device *dev, unsigned long board,
296 int err; 294 int err;
297 unsigned char SA_prom[32]; 295 unsigned char SA_prom[32];
298 int start_page, stop_page; 296 int start_page, stop_page;
299 struct ei_device *ei_local = netdev_priv(dev);
300 static u32 zorro8390_offsets[16] = { 297 static u32 zorro8390_offsets[16] = {
301 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 298 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e,
302 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, 299 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
@@ -388,8 +385,6 @@ static int zorro8390_init(struct net_device *dev, unsigned long board,
388 dev->netdev_ops = &zorro8390_netdev_ops; 385 dev->netdev_ops = &zorro8390_netdev_ops;
389 __NS8390_init(dev, 0); 386 __NS8390_init(dev, 0);
390 387
391 ei_local->msg_enable = zorro8390_msg_enable;
392
393 err = register_netdev(dev); 388 err = register_netdev(dev);
394 if (err) { 389 if (err) {
395 free_irq(IRQ_AMIGA_PORTS, dev); 390 free_irq(IRQ_AMIGA_PORTS, dev);
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index a77ee2f8fb8d..c1841db1b500 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -820,7 +820,7 @@ static int tg3_ape_event_lock(struct tg3 *tp, u32 timeout_us)
820 820
821 tg3_ape_unlock(tp, TG3_APE_LOCK_MEM); 821 tg3_ape_unlock(tp, TG3_APE_LOCK_MEM);
822 822
823 udelay(10); 823 usleep_range(10, 20);
824 timeout_us -= (timeout_us > 10) ? 10 : timeout_us; 824 timeout_us -= (timeout_us > 10) ? 10 : timeout_us;
825 } 825 }
826 826
@@ -922,8 +922,8 @@ static int tg3_ape_send_event(struct tg3 *tp, u32 event)
922 if (!(apedata & APE_FW_STATUS_READY)) 922 if (!(apedata & APE_FW_STATUS_READY))
923 return -EAGAIN; 923 return -EAGAIN;
924 924
925 /* Wait for up to 1 millisecond for APE to service previous event. */ 925 /* Wait for up to 20 millisecond for APE to service previous event. */
926 err = tg3_ape_event_lock(tp, 1000); 926 err = tg3_ape_event_lock(tp, 20000);
927 if (err) 927 if (err)
928 return err; 928 return err;
929 929
@@ -946,6 +946,7 @@ static void tg3_ape_driver_state_change(struct tg3 *tp, int kind)
946 946
947 switch (kind) { 947 switch (kind) {
948 case RESET_KIND_INIT: 948 case RESET_KIND_INIT:
949 tg3_ape_write32(tp, TG3_APE_HOST_HEARTBEAT_COUNT, tp->ape_hb++);
949 tg3_ape_write32(tp, TG3_APE_HOST_SEG_SIG, 950 tg3_ape_write32(tp, TG3_APE_HOST_SEG_SIG,
950 APE_HOST_SEG_SIG_MAGIC); 951 APE_HOST_SEG_SIG_MAGIC);
951 tg3_ape_write32(tp, TG3_APE_HOST_SEG_LEN, 952 tg3_ape_write32(tp, TG3_APE_HOST_SEG_LEN,
@@ -962,13 +963,6 @@ static void tg3_ape_driver_state_change(struct tg3 *tp, int kind)
962 event = APE_EVENT_STATUS_STATE_START; 963 event = APE_EVENT_STATUS_STATE_START;
963 break; 964 break;
964 case RESET_KIND_SHUTDOWN: 965 case RESET_KIND_SHUTDOWN:
965 /* With the interface we are currently using,
966 * APE does not track driver state. Wiping
967 * out the HOST SEGMENT SIGNATURE forces
968 * the APE to assume OS absent status.
969 */
970 tg3_ape_write32(tp, TG3_APE_HOST_SEG_SIG, 0x0);
971
972 if (device_may_wakeup(&tp->pdev->dev) && 966 if (device_may_wakeup(&tp->pdev->dev) &&
973 tg3_flag(tp, WOL_ENABLE)) { 967 tg3_flag(tp, WOL_ENABLE)) {
974 tg3_ape_write32(tp, TG3_APE_HOST_WOL_SPEED, 968 tg3_ape_write32(tp, TG3_APE_HOST_WOL_SPEED,
@@ -990,6 +984,18 @@ static void tg3_ape_driver_state_change(struct tg3 *tp, int kind)
990 tg3_ape_send_event(tp, event); 984 tg3_ape_send_event(tp, event);
991} 985}
992 986
987static void tg3_send_ape_heartbeat(struct tg3 *tp,
988 unsigned long interval)
989{
990 /* Check if hb interval has exceeded */
991 if (!tg3_flag(tp, ENABLE_APE) ||
992 time_before(jiffies, tp->ape_hb_jiffies + interval))
993 return;
994
995 tg3_ape_write32(tp, TG3_APE_HOST_HEARTBEAT_COUNT, tp->ape_hb++);
996 tp->ape_hb_jiffies = jiffies;
997}
998
993static void tg3_disable_ints(struct tg3 *tp) 999static void tg3_disable_ints(struct tg3 *tp)
994{ 1000{
995 int i; 1001 int i;
@@ -7262,6 +7268,7 @@ static int tg3_poll_msix(struct napi_struct *napi, int budget)
7262 } 7268 }
7263 } 7269 }
7264 7270
7271 tg3_send_ape_heartbeat(tp, TG3_APE_HB_INTERVAL << 1);
7265 return work_done; 7272 return work_done;
7266 7273
7267tx_recovery: 7274tx_recovery:
@@ -7344,6 +7351,7 @@ static int tg3_poll(struct napi_struct *napi, int budget)
7344 } 7351 }
7345 } 7352 }
7346 7353
7354 tg3_send_ape_heartbeat(tp, TG3_APE_HB_INTERVAL << 1);
7347 return work_done; 7355 return work_done;
7348 7356
7349tx_recovery: 7357tx_recovery:
@@ -10732,7 +10740,7 @@ static int tg3_reset_hw(struct tg3 *tp, bool reset_phy)
10732 if (tg3_flag(tp, ENABLE_APE)) 10740 if (tg3_flag(tp, ENABLE_APE))
10733 /* Write our heartbeat update interval to APE. */ 10741 /* Write our heartbeat update interval to APE. */
10734 tg3_ape_write32(tp, TG3_APE_HOST_HEARTBEAT_INT_MS, 10742 tg3_ape_write32(tp, TG3_APE_HOST_HEARTBEAT_INT_MS,
10735 APE_HOST_HEARTBEAT_INT_DISABLE); 10743 APE_HOST_HEARTBEAT_INT_5SEC);
10736 10744
10737 tg3_write_sig_post_reset(tp, RESET_KIND_INIT); 10745 tg3_write_sig_post_reset(tp, RESET_KIND_INIT);
10738 10746
@@ -11077,6 +11085,9 @@ static void tg3_timer(struct timer_list *t)
11077 tp->asf_counter = tp->asf_multiplier; 11085 tp->asf_counter = tp->asf_multiplier;
11078 } 11086 }
11079 11087
11088 /* Update the APE heartbeat every 5 seconds.*/
11089 tg3_send_ape_heartbeat(tp, TG3_APE_HB_INTERVAL);
11090
11080 spin_unlock(&tp->lock); 11091 spin_unlock(&tp->lock);
11081 11092
11082restart_timer: 11093restart_timer:
@@ -16653,6 +16664,8 @@ static int tg3_get_invariants(struct tg3 *tp, const struct pci_device_id *ent)
16653 pci_state_reg); 16664 pci_state_reg);
16654 16665
16655 tg3_ape_lock_init(tp); 16666 tg3_ape_lock_init(tp);
16667 tp->ape_hb_interval =
16668 msecs_to_jiffies(APE_HOST_HEARTBEAT_INT_5SEC);
16656 } 16669 }
16657 16670
16658 /* Set up tp->grc_local_ctrl before calling 16671 /* Set up tp->grc_local_ctrl before calling
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index 47f51cc0566d..1d61aa3efda1 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -2508,6 +2508,7 @@
2508#define TG3_APE_LOCK_PHY3 5 2508#define TG3_APE_LOCK_PHY3 5
2509#define TG3_APE_LOCK_GPIO 7 2509#define TG3_APE_LOCK_GPIO 7
2510 2510
2511#define TG3_APE_HB_INTERVAL (tp->ape_hb_interval)
2511#define TG3_EEPROM_SB_F1R2_MBA_OFF 0x10 2512#define TG3_EEPROM_SB_F1R2_MBA_OFF 0x10
2512 2513
2513 2514
@@ -3423,6 +3424,10 @@ struct tg3 {
3423 struct device *hwmon_dev; 3424 struct device *hwmon_dev;
3424 bool link_up; 3425 bool link_up;
3425 bool pcierr_recovery; 3426 bool pcierr_recovery;
3427
3428 u32 ape_hb;
3429 unsigned long ape_hb_interval;
3430 unsigned long ape_hb_jiffies;
3426}; 3431};
3427 3432
3428/* Accessor macros for chip and asic attributes 3433/* Accessor macros for chip and asic attributes
diff --git a/drivers/net/ethernet/cavium/common/cavium_ptp.c b/drivers/net/ethernet/cavium/common/cavium_ptp.c
index c87c9c684a33..d59497a7bdce 100644
--- a/drivers/net/ethernet/cavium/common/cavium_ptp.c
+++ b/drivers/net/ethernet/cavium/common/cavium_ptp.c
@@ -75,6 +75,8 @@ EXPORT_SYMBOL(cavium_ptp_get);
75 75
76void cavium_ptp_put(struct cavium_ptp *ptp) 76void cavium_ptp_put(struct cavium_ptp *ptp)
77{ 77{
78 if (!ptp)
79 return;
78 pci_dev_put(ptp->pdev); 80 pci_dev_put(ptp->pdev);
79} 81}
80EXPORT_SYMBOL(cavium_ptp_put); 82EXPORT_SYMBOL(cavium_ptp_put);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index b68cde9f17d2..7d9c5ffbd041 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -67,11 +67,6 @@ module_param(cpi_alg, int, S_IRUGO);
67MODULE_PARM_DESC(cpi_alg, 67MODULE_PARM_DESC(cpi_alg,
68 "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)"); 68 "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)");
69 69
70struct nicvf_xdp_tx {
71 u64 dma_addr;
72 u8 qidx;
73};
74
75static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx) 70static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx)
76{ 71{
77 if (nic->sqs_mode) 72 if (nic->sqs_mode)
@@ -507,29 +502,14 @@ static int nicvf_init_resources(struct nicvf *nic)
507 return 0; 502 return 0;
508} 503}
509 504
510static void nicvf_unmap_page(struct nicvf *nic, struct page *page, u64 dma_addr)
511{
512 /* Check if it's a recycled page, if not unmap the DMA mapping.
513 * Recycled page holds an extra reference.
514 */
515 if (page_ref_count(page) == 1) {
516 dma_addr &= PAGE_MASK;
517 dma_unmap_page_attrs(&nic->pdev->dev, dma_addr,
518 RCV_FRAG_LEN + XDP_HEADROOM,
519 DMA_FROM_DEVICE,
520 DMA_ATTR_SKIP_CPU_SYNC);
521 }
522}
523
524static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, 505static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
525 struct cqe_rx_t *cqe_rx, struct snd_queue *sq, 506 struct cqe_rx_t *cqe_rx, struct snd_queue *sq,
526 struct rcv_queue *rq, struct sk_buff **skb) 507 struct rcv_queue *rq, struct sk_buff **skb)
527{ 508{
528 struct xdp_buff xdp; 509 struct xdp_buff xdp;
529 struct page *page; 510 struct page *page;
530 struct nicvf_xdp_tx *xdp_tx = NULL;
531 u32 action; 511 u32 action;
532 u16 len, err, offset = 0; 512 u16 len, offset = 0;
533 u64 dma_addr, cpu_addr; 513 u64 dma_addr, cpu_addr;
534 void *orig_data; 514 void *orig_data;
535 515
@@ -543,7 +523,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
543 cpu_addr = (u64)phys_to_virt(cpu_addr); 523 cpu_addr = (u64)phys_to_virt(cpu_addr);
544 page = virt_to_page((void *)cpu_addr); 524 page = virt_to_page((void *)cpu_addr);
545 525
546 xdp.data_hard_start = page_address(page) + RCV_BUF_HEADROOM; 526 xdp.data_hard_start = page_address(page);
547 xdp.data = (void *)cpu_addr; 527 xdp.data = (void *)cpu_addr;
548 xdp_set_data_meta_invalid(&xdp); 528 xdp_set_data_meta_invalid(&xdp);
549 xdp.data_end = xdp.data + len; 529 xdp.data_end = xdp.data + len;
@@ -563,7 +543,18 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
563 543
564 switch (action) { 544 switch (action) {
565 case XDP_PASS: 545 case XDP_PASS:
566 nicvf_unmap_page(nic, page, dma_addr); 546 /* Check if it's a recycled page, if not
547 * unmap the DMA mapping.
548 *
549 * Recycled page holds an extra reference.
550 */
551 if (page_ref_count(page) == 1) {
552 dma_addr &= PAGE_MASK;
553 dma_unmap_page_attrs(&nic->pdev->dev, dma_addr,
554 RCV_FRAG_LEN + XDP_PACKET_HEADROOM,
555 DMA_FROM_DEVICE,
556 DMA_ATTR_SKIP_CPU_SYNC);
557 }
567 558
568 /* Build SKB and pass on packet to network stack */ 559 /* Build SKB and pass on packet to network stack */
569 *skb = build_skb(xdp.data, 560 *skb = build_skb(xdp.data,
@@ -576,20 +567,6 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
576 case XDP_TX: 567 case XDP_TX:
577 nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len); 568 nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len);
578 return true; 569 return true;
579 case XDP_REDIRECT:
580 /* Save DMA address for use while transmitting */
581 xdp_tx = (struct nicvf_xdp_tx *)page_address(page);
582 xdp_tx->dma_addr = dma_addr;
583 xdp_tx->qidx = nicvf_netdev_qidx(nic, cqe_rx->rq_idx);
584
585 err = xdp_do_redirect(nic->pnicvf->netdev, &xdp, prog);
586 if (!err)
587 return true;
588
589 /* Free the page on error */
590 nicvf_unmap_page(nic, page, dma_addr);
591 put_page(page);
592 break;
593 default: 570 default:
594 bpf_warn_invalid_xdp_action(action); 571 bpf_warn_invalid_xdp_action(action);
595 /* fall through */ 572 /* fall through */
@@ -597,7 +574,18 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
597 trace_xdp_exception(nic->netdev, prog, action); 574 trace_xdp_exception(nic->netdev, prog, action);
598 /* fall through */ 575 /* fall through */
599 case XDP_DROP: 576 case XDP_DROP:
600 nicvf_unmap_page(nic, page, dma_addr); 577 /* Check if it's a recycled page, if not
578 * unmap the DMA mapping.
579 *
580 * Recycled page holds an extra reference.
581 */
582 if (page_ref_count(page) == 1) {
583 dma_addr &= PAGE_MASK;
584 dma_unmap_page_attrs(&nic->pdev->dev, dma_addr,
585 RCV_FRAG_LEN + XDP_PACKET_HEADROOM,
586 DMA_FROM_DEVICE,
587 DMA_ATTR_SKIP_CPU_SYNC);
588 }
601 put_page(page); 589 put_page(page);
602 return true; 590 return true;
603 } 591 }
@@ -1864,50 +1852,6 @@ static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
1864 } 1852 }
1865} 1853}
1866 1854
1867static int nicvf_xdp_xmit(struct net_device *netdev, struct xdp_buff *xdp)
1868{
1869 struct nicvf *nic = netdev_priv(netdev);
1870 struct nicvf *snic = nic;
1871 struct nicvf_xdp_tx *xdp_tx;
1872 struct snd_queue *sq;
1873 struct page *page;
1874 int err, qidx;
1875
1876 if (!netif_running(netdev) || !nic->xdp_prog)
1877 return -EINVAL;
1878
1879 page = virt_to_page(xdp->data);
1880 xdp_tx = (struct nicvf_xdp_tx *)page_address(page);
1881 qidx = xdp_tx->qidx;
1882
1883 if (xdp_tx->qidx >= nic->xdp_tx_queues)
1884 return -EINVAL;
1885
1886 /* Get secondary Qset's info */
1887 if (xdp_tx->qidx >= MAX_SND_QUEUES_PER_QS) {
1888 qidx = xdp_tx->qidx / MAX_SND_QUEUES_PER_QS;
1889 snic = (struct nicvf *)nic->snicvf[qidx - 1];
1890 if (!snic)
1891 return -EINVAL;
1892 qidx = xdp_tx->qidx % MAX_SND_QUEUES_PER_QS;
1893 }
1894
1895 sq = &snic->qs->sq[qidx];
1896 err = nicvf_xdp_sq_append_pkt(snic, sq, (u64)xdp->data,
1897 xdp_tx->dma_addr,
1898 xdp->data_end - xdp->data);
1899 if (err)
1900 return -ENOMEM;
1901
1902 nicvf_xdp_sq_doorbell(snic, sq, qidx);
1903 return 0;
1904}
1905
1906static void nicvf_xdp_flush(struct net_device *dev)
1907{
1908 return;
1909}
1910
1911static int nicvf_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr) 1855static int nicvf_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr)
1912{ 1856{
1913 struct hwtstamp_config config; 1857 struct hwtstamp_config config;
@@ -1986,8 +1930,6 @@ static const struct net_device_ops nicvf_netdev_ops = {
1986 .ndo_fix_features = nicvf_fix_features, 1930 .ndo_fix_features = nicvf_fix_features,
1987 .ndo_set_features = nicvf_set_features, 1931 .ndo_set_features = nicvf_set_features,
1988 .ndo_bpf = nicvf_xdp, 1932 .ndo_bpf = nicvf_xdp,
1989 .ndo_xdp_xmit = nicvf_xdp_xmit,
1990 .ndo_xdp_flush = nicvf_xdp_flush,
1991 .ndo_do_ioctl = nicvf_ioctl, 1933 .ndo_do_ioctl = nicvf_ioctl,
1992}; 1934};
1993 1935
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index 3eae9ff9b53a..d42704d07484 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -204,7 +204,7 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr,
204 204
205 /* Reserve space for header modifications by BPF program */ 205 /* Reserve space for header modifications by BPF program */
206 if (rbdr->is_xdp) 206 if (rbdr->is_xdp)
207 buf_len += XDP_HEADROOM; 207 buf_len += XDP_PACKET_HEADROOM;
208 208
209 /* Check if it's recycled */ 209 /* Check if it's recycled */
210 if (pgcache) 210 if (pgcache)
@@ -224,9 +224,8 @@ ret:
224 nic->rb_page = NULL; 224 nic->rb_page = NULL;
225 return -ENOMEM; 225 return -ENOMEM;
226 } 226 }
227
228 if (pgcache) 227 if (pgcache)
229 pgcache->dma_addr = *rbuf + XDP_HEADROOM; 228 pgcache->dma_addr = *rbuf + XDP_PACKET_HEADROOM;
230 nic->rb_page_offset += buf_len; 229 nic->rb_page_offset += buf_len;
231 } 230 }
232 231
@@ -1244,7 +1243,7 @@ int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq,
1244 int qentry; 1243 int qentry;
1245 1244
1246 if (subdesc_cnt > sq->xdp_free_cnt) 1245 if (subdesc_cnt > sq->xdp_free_cnt)
1247 return -1; 1246 return 0;
1248 1247
1249 qentry = nicvf_get_sq_desc(sq, subdesc_cnt); 1248 qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
1250 1249
@@ -1255,7 +1254,7 @@ int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq,
1255 1254
1256 sq->xdp_desc_cnt += subdesc_cnt; 1255 sq->xdp_desc_cnt += subdesc_cnt;
1257 1256
1258 return 0; 1257 return 1;
1259} 1258}
1260 1259
1261/* Calculate no of SQ subdescriptors needed to transmit all 1260/* Calculate no of SQ subdescriptors needed to transmit all
@@ -1656,7 +1655,7 @@ static void nicvf_unmap_rcv_buffer(struct nicvf *nic, u64 dma_addr,
1656 if (page_ref_count(page) != 1) 1655 if (page_ref_count(page) != 1)
1657 return; 1656 return;
1658 1657
1659 len += XDP_HEADROOM; 1658 len += XDP_PACKET_HEADROOM;
1660 /* Receive buffers in XDP mode are mapped from page start */ 1659 /* Receive buffers in XDP mode are mapped from page start */
1661 dma_addr &= PAGE_MASK; 1660 dma_addr &= PAGE_MASK;
1662 } 1661 }
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index ce1eed7a6d63..5e9a03cf1b4d 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -11,7 +11,6 @@
11 11
12#include <linux/netdevice.h> 12#include <linux/netdevice.h>
13#include <linux/iommu.h> 13#include <linux/iommu.h>
14#include <linux/bpf.h>
15#include <net/xdp.h> 14#include <net/xdp.h>
16#include "q_struct.h" 15#include "q_struct.h"
17 16
@@ -94,9 +93,6 @@
94#define RCV_FRAG_LEN (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \ 93#define RCV_FRAG_LEN (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \
95 SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) 94 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
96 95
97#define RCV_BUF_HEADROOM 128 /* To store dma address for XDP redirect */
98#define XDP_HEADROOM (XDP_PACKET_HEADROOM + RCV_BUF_HEADROOM)
99
100#define MAX_CQES_FOR_TX ((SND_QUEUE_LEN / MIN_SQ_DESC_PER_PKT_XMIT) * \ 96#define MAX_CQES_FOR_TX ((SND_QUEUE_LEN / MIN_SQ_DESC_PER_PKT_XMIT) * \
101 MAX_CQE_PER_PKT_XMIT) 97 MAX_CQE_PER_PKT_XMIT)
102 98
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
index 557fd8bfd54e..9da6f57901a9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@ -472,7 +472,7 @@ int cudbg_collect_cim_la(struct cudbg_init *pdbg_init,
472 472
473 if (is_t6(padap->params.chip)) { 473 if (is_t6(padap->params.chip)) {
474 size = padap->params.cim_la_size / 10 + 1; 474 size = padap->params.cim_la_size / 10 + 1;
475 size *= 11 * sizeof(u32); 475 size *= 10 * sizeof(u32);
476 } else { 476 } else {
477 size = padap->params.cim_la_size / 8; 477 size = padap->params.cim_la_size / 8;
478 size *= 8 * sizeof(u32); 478 size *= 8 * sizeof(u32);
@@ -878,6 +878,86 @@ static int cudbg_get_payload_range(struct adapter *padap, u8 mem_type,
878 &payload->start, &payload->end); 878 &payload->start, &payload->end);
879} 879}
880 880
881static int cudbg_memory_read(struct cudbg_init *pdbg_init, int win,
882 int mtype, u32 addr, u32 len, void *hbuf)
883{
884 u32 win_pf, memoffset, mem_aperture, mem_base;
885 struct adapter *adap = pdbg_init->adap;
886 u32 pos, offset, resid;
887 u32 *res_buf;
888 u64 *buf;
889 int ret;
890
891 /* Argument sanity checks ...
892 */
893 if (addr & 0x3 || (uintptr_t)hbuf & 0x3)
894 return -EINVAL;
895
896 buf = (u64 *)hbuf;
897
898 /* Try to do 64-bit reads. Residual will be handled later. */
899 resid = len & 0x7;
900 len -= resid;
901
902 ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base,
903 &mem_aperture);
904 if (ret)
905 return ret;
906
907 addr = addr + memoffset;
908 win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf);
909
910 pos = addr & ~(mem_aperture - 1);
911 offset = addr - pos;
912
913 /* Set up initial PCI-E Memory Window to cover the start of our
914 * transfer.
915 */
916 t4_memory_update_win(adap, win, pos | win_pf);
917
918 /* Transfer data from the adapter */
919 while (len > 0) {
920 *buf++ = le64_to_cpu((__force __le64)
921 t4_read_reg64(adap, mem_base + offset));
922 offset += sizeof(u64);
923 len -= sizeof(u64);
924
925 /* If we've reached the end of our current window aperture,
926 * move the PCI-E Memory Window on to the next.
927 */
928 if (offset == mem_aperture) {
929 pos += mem_aperture;
930 offset = 0;
931 t4_memory_update_win(adap, win, pos | win_pf);
932 }
933 }
934
935 res_buf = (u32 *)buf;
936 /* Read residual in 32-bit multiples */
937 while (resid > sizeof(u32)) {
938 *res_buf++ = le32_to_cpu((__force __le32)
939 t4_read_reg(adap, mem_base + offset));
940 offset += sizeof(u32);
941 resid -= sizeof(u32);
942
943 /* If we've reached the end of our current window aperture,
944 * move the PCI-E Memory Window on to the next.
945 */
946 if (offset == mem_aperture) {
947 pos += mem_aperture;
948 offset = 0;
949 t4_memory_update_win(adap, win, pos | win_pf);
950 }
951 }
952
953 /* Transfer residual < 32-bits */
954 if (resid)
955 t4_memory_rw_residual(adap, resid, mem_base + offset,
956 (u8 *)res_buf, T4_MEMORY_READ);
957
958 return 0;
959}
960
881#define CUDBG_YIELD_ITERATION 256 961#define CUDBG_YIELD_ITERATION 256
882 962
883static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init, 963static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
@@ -937,10 +1017,8 @@ static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
937 goto skip_read; 1017 goto skip_read;
938 1018
939 spin_lock(&padap->win0_lock); 1019 spin_lock(&padap->win0_lock);
940 rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type, 1020 rc = cudbg_memory_read(pdbg_init, MEMWIN_NIC, mem_type,
941 bytes_read, bytes, 1021 bytes_read, bytes, temp_buff.data);
942 (__be32 *)temp_buff.data,
943 1);
944 spin_unlock(&padap->win0_lock); 1022 spin_unlock(&padap->win0_lock);
945 if (rc) { 1023 if (rc) {
946 cudbg_err->sys_err = rc; 1024 cudbg_err->sys_err = rc;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 9040e13ce4b7..d3fa53db61ee 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -1488,6 +1488,11 @@ u32 t4_read_pcie_cfg4(struct adapter *adap, int reg);
1488u32 t4_get_util_window(struct adapter *adap); 1488u32 t4_get_util_window(struct adapter *adap);
1489void t4_setup_memwin(struct adapter *adap, u32 memwin_base, u32 window); 1489void t4_setup_memwin(struct adapter *adap, u32 memwin_base, u32 window);
1490 1490
1491int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off,
1492 u32 *mem_base, u32 *mem_aperture);
1493void t4_memory_update_win(struct adapter *adap, int win, u32 addr);
1494void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf,
1495 int dir);
1491#define T4_MEMORY_WRITE 0 1496#define T4_MEMORY_WRITE 0
1492#define T4_MEMORY_READ 1 1497#define T4_MEMORY_READ 1
1493int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len, 1498int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
index 30485f9a598f..143686c60234 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
@@ -102,7 +102,7 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity)
102 case CUDBG_CIM_LA: 102 case CUDBG_CIM_LA:
103 if (is_t6(adap->params.chip)) { 103 if (is_t6(adap->params.chip)) {
104 len = adap->params.cim_la_size / 10 + 1; 104 len = adap->params.cim_la_size / 10 + 1;
105 len *= 11 * sizeof(u32); 105 len *= 10 * sizeof(u32);
106 } else { 106 } else {
107 len = adap->params.cim_la_size / 8; 107 len = adap->params.cim_la_size / 8;
108 len *= 8 * sizeof(u32); 108 len *= 8 * sizeof(u32);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 56bc626ef006..7b452e85de2a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4982,9 +4982,10 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
4982 4982
4983 pcie_fw = readl(adap->regs + PCIE_FW_A); 4983 pcie_fw = readl(adap->regs + PCIE_FW_A);
4984 /* Check if cxgb4 is the MASTER and fw is initialized */ 4984 /* Check if cxgb4 is the MASTER and fw is initialized */
4985 if (!(pcie_fw & PCIE_FW_INIT_F) || 4985 if (num_vfs &&
4986 (!(pcie_fw & PCIE_FW_INIT_F) ||
4986 !(pcie_fw & PCIE_FW_MASTER_VLD_F) || 4987 !(pcie_fw & PCIE_FW_MASTER_VLD_F) ||
4987 PCIE_FW_MASTER_G(pcie_fw) != CXGB4_UNIFIED_PF) { 4988 PCIE_FW_MASTER_G(pcie_fw) != CXGB4_UNIFIED_PF)) {
4988 dev_warn(&pdev->dev, 4989 dev_warn(&pdev->dev,
4989 "cxgb4 driver needs to be MASTER to support SRIOV\n"); 4990 "cxgb4 driver needs to be MASTER to support SRIOV\n");
4990 return -EOPNOTSUPP; 4991 return -EOPNOTSUPP;
@@ -5599,24 +5600,24 @@ static void remove_one(struct pci_dev *pdev)
5599#if IS_ENABLED(CONFIG_IPV6) 5600#if IS_ENABLED(CONFIG_IPV6)
5600 t4_cleanup_clip_tbl(adapter); 5601 t4_cleanup_clip_tbl(adapter);
5601#endif 5602#endif
5602 iounmap(adapter->regs);
5603 if (!is_t4(adapter->params.chip)) 5603 if (!is_t4(adapter->params.chip))
5604 iounmap(adapter->bar2); 5604 iounmap(adapter->bar2);
5605 pci_disable_pcie_error_reporting(pdev);
5606 if ((adapter->flags & DEV_ENABLED)) {
5607 pci_disable_device(pdev);
5608 adapter->flags &= ~DEV_ENABLED;
5609 }
5610 pci_release_regions(pdev);
5611 kfree(adapter->mbox_log);
5612 synchronize_rcu();
5613 kfree(adapter);
5614 } 5605 }
5615#ifdef CONFIG_PCI_IOV 5606#ifdef CONFIG_PCI_IOV
5616 else { 5607 else {
5617 cxgb4_iov_configure(adapter->pdev, 0); 5608 cxgb4_iov_configure(adapter->pdev, 0);
5618 } 5609 }
5619#endif 5610#endif
5611 iounmap(adapter->regs);
5612 pci_disable_pcie_error_reporting(pdev);
5613 if ((adapter->flags & DEV_ENABLED)) {
5614 pci_disable_device(pdev);
5615 adapter->flags &= ~DEV_ENABLED;
5616 }
5617 pci_release_regions(pdev);
5618 kfree(adapter->mbox_log);
5619 synchronize_rcu();
5620 kfree(adapter);
5620} 5621}
5621 5622
5622/* "Shutdown" quiesces the device, stopping Ingress Packet and Interrupt 5623/* "Shutdown" quiesces the device, stopping Ingress Packet and Interrupt
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 047609ef0515..bd41f93f73ed 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -484,6 +484,117 @@ static int t4_edc_err_read(struct adapter *adap, int idx)
484} 484}
485 485
486/** 486/**
487 * t4_memory_rw_init - Get memory window relative offset, base, and size.
488 * @adap: the adapter
489 * @win: PCI-E Memory Window to use
490 * @mtype: memory type: MEM_EDC0, MEM_EDC1 or MEM_MC
491 * @mem_off: memory relative offset with respect to @mtype.
492 * @mem_base: configured memory base address.
493 * @mem_aperture: configured memory window aperture.
494 *
495 * Get the configured memory window's relative offset, base, and size.
496 */
497int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off,
498 u32 *mem_base, u32 *mem_aperture)
499{
500 u32 edc_size, mc_size, mem_reg;
501
502 /* Offset into the region of memory which is being accessed
503 * MEM_EDC0 = 0
504 * MEM_EDC1 = 1
505 * MEM_MC = 2 -- MEM_MC for chips with only 1 memory controller
506 * MEM_MC1 = 3 -- for chips with 2 memory controllers (e.g. T5)
507 * MEM_HMA = 4
508 */
509 edc_size = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A));
510 if (mtype == MEM_HMA) {
511 *mem_off = 2 * (edc_size * 1024 * 1024);
512 } else if (mtype != MEM_MC1) {
513 *mem_off = (mtype * (edc_size * 1024 * 1024));
514 } else {
515 mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap,
516 MA_EXT_MEMORY0_BAR_A));
517 *mem_off = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
518 }
519
520 /* Each PCI-E Memory Window is programmed with a window size -- or
521 * "aperture" -- which controls the granularity of its mapping onto
522 * adapter memory. We need to grab that aperture in order to know
523 * how to use the specified window. The window is also programmed
524 * with the base address of the Memory Window in BAR0's address
525 * space. For T4 this is an absolute PCI-E Bus Address. For T5
526 * the address is relative to BAR0.
527 */
528 mem_reg = t4_read_reg(adap,
529 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A,
530 win));
531 /* a dead adapter will return 0xffffffff for PIO reads */
532 if (mem_reg == 0xffffffff)
533 return -ENXIO;
534
535 *mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X);
536 *mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X;
537 if (is_t4(adap->params.chip))
538 *mem_base -= adap->t4_bar0;
539
540 return 0;
541}
542
543/**
544 * t4_memory_update_win - Move memory window to specified address.
545 * @adap: the adapter
546 * @win: PCI-E Memory Window to use
547 * @addr: location to move.
548 *
549 * Move memory window to specified address.
550 */
551void t4_memory_update_win(struct adapter *adap, int win, u32 addr)
552{
553 t4_write_reg(adap,
554 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win),
555 addr);
556 /* Read it back to ensure that changes propagate before we
557 * attempt to use the new value.
558 */
559 t4_read_reg(adap,
560 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
561}
562
563/**
564 * t4_memory_rw_residual - Read/Write residual data.
565 * @adap: the adapter
566 * @off: relative offset within residual to start read/write.
567 * @addr: address within indicated memory type.
568 * @buf: host memory buffer
569 * @dir: direction of transfer T4_MEMORY_READ (1) or T4_MEMORY_WRITE (0)
570 *
571 * Read/Write residual data less than 32-bits.
572 */
573void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf,
574 int dir)
575{
576 union {
577 u32 word;
578 char byte[4];
579 } last;
580 unsigned char *bp;
581 int i;
582
583 if (dir == T4_MEMORY_READ) {
584 last.word = le32_to_cpu((__force __le32)
585 t4_read_reg(adap, addr));
586 for (bp = (unsigned char *)buf, i = off; i < 4; i++)
587 bp[i] = last.byte[i];
588 } else {
589 last.word = *buf;
590 for (i = off; i < 4; i++)
591 last.byte[i] = 0;
592 t4_write_reg(adap, addr,
593 (__force u32)cpu_to_le32(last.word));
594 }
595}
596
597/**
487 * t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window 598 * t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window
488 * @adap: the adapter 599 * @adap: the adapter
489 * @win: PCI-E Memory Window to use 600 * @win: PCI-E Memory Window to use
@@ -504,8 +615,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
504 u32 len, void *hbuf, int dir) 615 u32 len, void *hbuf, int dir)
505{ 616{
506 u32 pos, offset, resid, memoffset; 617 u32 pos, offset, resid, memoffset;
507 u32 edc_size, mc_size, win_pf, mem_reg, mem_aperture, mem_base; 618 u32 win_pf, mem_aperture, mem_base;
508 u32 *buf; 619 u32 *buf;
620 int ret;
509 621
510 /* Argument sanity checks ... 622 /* Argument sanity checks ...
511 */ 623 */
@@ -521,59 +633,26 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
521 resid = len & 0x3; 633 resid = len & 0x3;
522 len -= resid; 634 len -= resid;
523 635
524 /* Offset into the region of memory which is being accessed 636 ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base,
525 * MEM_EDC0 = 0 637 &mem_aperture);
526 * MEM_EDC1 = 1 638 if (ret)
527 * MEM_MC = 2 -- MEM_MC for chips with only 1 memory controller 639 return ret;
528 * MEM_MC1 = 3 -- for chips with 2 memory controllers (e.g. T5)
529 * MEM_HMA = 4
530 */
531 edc_size = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A));
532 if (mtype == MEM_HMA) {
533 memoffset = 2 * (edc_size * 1024 * 1024);
534 } else if (mtype != MEM_MC1) {
535 memoffset = (mtype * (edc_size * 1024 * 1024));
536 } else {
537 mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap,
538 MA_EXT_MEMORY0_BAR_A));
539 memoffset = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
540 }
541 640
542 /* Determine the PCIE_MEM_ACCESS_OFFSET */ 641 /* Determine the PCIE_MEM_ACCESS_OFFSET */
543 addr = addr + memoffset; 642 addr = addr + memoffset;
544 643
545 /* Each PCI-E Memory Window is programmed with a window size -- or
546 * "aperture" -- which controls the granularity of its mapping onto
547 * adapter memory. We need to grab that aperture in order to know
548 * how to use the specified window. The window is also programmed
549 * with the base address of the Memory Window in BAR0's address
550 * space. For T4 this is an absolute PCI-E Bus Address. For T5
551 * the address is relative to BAR0.
552 */
553 mem_reg = t4_read_reg(adap,
554 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A,
555 win));
556 mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X);
557 mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X;
558 if (is_t4(adap->params.chip))
559 mem_base -= adap->t4_bar0;
560 win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf); 644 win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf);
561 645
562 /* Calculate our initial PCI-E Memory Window Position and Offset into 646 /* Calculate our initial PCI-E Memory Window Position and Offset into
563 * that Window. 647 * that Window.
564 */ 648 */
565 pos = addr & ~(mem_aperture-1); 649 pos = addr & ~(mem_aperture - 1);
566 offset = addr - pos; 650 offset = addr - pos;
567 651
568 /* Set up initial PCI-E Memory Window to cover the start of our 652 /* Set up initial PCI-E Memory Window to cover the start of our
569 * transfer. (Read it back to ensure that changes propagate before we 653 * transfer.
570 * attempt to use the new value.)
571 */ 654 */
572 t4_write_reg(adap, 655 t4_memory_update_win(adap, win, pos | win_pf);
573 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win),
574 pos | win_pf);
575 t4_read_reg(adap,
576 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
577 656
578 /* Transfer data to/from the adapter as long as there's an integral 657 /* Transfer data to/from the adapter as long as there's an integral
579 * number of 32-bit transfers to complete. 658 * number of 32-bit transfers to complete.
@@ -628,12 +707,7 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
628 if (offset == mem_aperture) { 707 if (offset == mem_aperture) {
629 pos += mem_aperture; 708 pos += mem_aperture;
630 offset = 0; 709 offset = 0;
631 t4_write_reg(adap, 710 t4_memory_update_win(adap, win, pos | win_pf);
632 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A,
633 win), pos | win_pf);
634 t4_read_reg(adap,
635 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A,
636 win));
637 } 711 }
638 } 712 }
639 713
@@ -642,28 +716,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
642 * residual amount. The PCI-E Memory Window has already been moved 716 * residual amount. The PCI-E Memory Window has already been moved
643 * above (if necessary) to cover this final transfer. 717 * above (if necessary) to cover this final transfer.
644 */ 718 */
645 if (resid) { 719 if (resid)
646 union { 720 t4_memory_rw_residual(adap, resid, mem_base + offset,
647 u32 word; 721 (u8 *)buf, dir);
648 char byte[4];
649 } last;
650 unsigned char *bp;
651 int i;
652
653 if (dir == T4_MEMORY_READ) {
654 last.word = le32_to_cpu(
655 (__force __le32)t4_read_reg(adap,
656 mem_base + offset));
657 for (bp = (unsigned char *)buf, i = resid; i < 4; i++)
658 bp[i] = last.byte[i];
659 } else {
660 last.word = *buf;
661 for (i = resid; i < 4; i++)
662 last.byte[i] = 0;
663 t4_write_reg(adap, mem_base + offset,
664 (__force u32)cpu_to_le32(last.word));
665 }
666 }
667 722
668 return 0; 723 return 0;
669} 724}
@@ -2637,7 +2692,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
2637} 2692}
2638 2693
2639#define EEPROM_STAT_ADDR 0x7bfc 2694#define EEPROM_STAT_ADDR 0x7bfc
2640#define VPD_SIZE 0x800
2641#define VPD_BASE 0x400 2695#define VPD_BASE 0x400
2642#define VPD_BASE_OLD 0 2696#define VPD_BASE_OLD 0
2643#define VPD_LEN 1024 2697#define VPD_LEN 1024
@@ -2704,15 +2758,6 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p)
2704 if (!vpd) 2758 if (!vpd)
2705 return -ENOMEM; 2759 return -ENOMEM;
2706 2760
2707 /* We have two VPD data structures stored in the adapter VPD area.
2708 * By default, Linux calculates the size of the VPD area by traversing
2709 * the first VPD area at offset 0x0, so we need to tell the OS what
2710 * our real VPD size is.
2711 */
2712 ret = pci_set_vpd_size(adapter->pdev, VPD_SIZE);
2713 if (ret < 0)
2714 goto out;
2715
2716 /* Card information normally starts at VPD_BASE but early cards had 2761 /* Card information normally starts at VPD_BASE but early cards had
2717 * it at 0. 2762 * it at 0.
2718 */ 2763 */
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 7caa8da48421..a998c36c5e61 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -2766,7 +2766,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
2766 2766
2767 priv->channel = (u16)channel; 2767 priv->channel = (u16)channel;
2768 2768
2769 /* Start a thread that will walk the CPUs with affine portals 2769 /* Walk the CPUs with affine portals
2770 * and add this pool channel to each's dequeue mask. 2770 * and add this pool channel to each's dequeue mask.
2771 */ 2771 */
2772 dpaa_eth_add_channel(priv->channel); 2772 dpaa_eth_add_channel(priv->channel);
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
index faea674094b9..85306d1b2acf 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
@@ -211,7 +211,7 @@ static int dpaa_set_pauseparam(struct net_device *net_dev,
211 if (epause->rx_pause) 211 if (epause->rx_pause)
212 newadv = ADVERTISED_Pause | ADVERTISED_Asym_Pause; 212 newadv = ADVERTISED_Pause | ADVERTISED_Asym_Pause;
213 if (epause->tx_pause) 213 if (epause->tx_pause)
214 newadv |= ADVERTISED_Asym_Pause; 214 newadv ^= ADVERTISED_Asym_Pause;
215 215
216 oldadv = phydev->advertising & 216 oldadv = phydev->advertising &
217 (ADVERTISED_Pause | ADVERTISED_Asym_Pause); 217 (ADVERTISED_Pause | ADVERTISED_Asym_Pause);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 27447260215d..1703b881252f 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -90,7 +90,7 @@ MODULE_VERSION(IBMVNIC_DRIVER_VERSION);
90 90
91static int ibmvnic_version = IBMVNIC_INITIAL_VERSION; 91static int ibmvnic_version = IBMVNIC_INITIAL_VERSION;
92static int ibmvnic_remove(struct vio_dev *); 92static int ibmvnic_remove(struct vio_dev *);
93static void release_sub_crqs(struct ibmvnic_adapter *); 93static void release_sub_crqs(struct ibmvnic_adapter *, bool);
94static int ibmvnic_reset_crq(struct ibmvnic_adapter *); 94static int ibmvnic_reset_crq(struct ibmvnic_adapter *);
95static int ibmvnic_send_crq_init(struct ibmvnic_adapter *); 95static int ibmvnic_send_crq_init(struct ibmvnic_adapter *);
96static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *); 96static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *);
@@ -361,14 +361,14 @@ static void release_stats_buffers(struct ibmvnic_adapter *adapter)
361static int init_stats_buffers(struct ibmvnic_adapter *adapter) 361static int init_stats_buffers(struct ibmvnic_adapter *adapter)
362{ 362{
363 adapter->tx_stats_buffers = 363 adapter->tx_stats_buffers =
364 kcalloc(adapter->req_tx_queues, 364 kcalloc(IBMVNIC_MAX_QUEUES,
365 sizeof(struct ibmvnic_tx_queue_stats), 365 sizeof(struct ibmvnic_tx_queue_stats),
366 GFP_KERNEL); 366 GFP_KERNEL);
367 if (!adapter->tx_stats_buffers) 367 if (!adapter->tx_stats_buffers)
368 return -ENOMEM; 368 return -ENOMEM;
369 369
370 adapter->rx_stats_buffers = 370 adapter->rx_stats_buffers =
371 kcalloc(adapter->req_rx_queues, 371 kcalloc(IBMVNIC_MAX_QUEUES,
372 sizeof(struct ibmvnic_rx_queue_stats), 372 sizeof(struct ibmvnic_rx_queue_stats),
373 GFP_KERNEL); 373 GFP_KERNEL);
374 if (!adapter->rx_stats_buffers) 374 if (!adapter->rx_stats_buffers)
@@ -461,7 +461,7 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter)
461 if (!adapter->rx_pool) 461 if (!adapter->rx_pool)
462 return; 462 return;
463 463
464 for (i = 0; i < adapter->num_active_rx_pools; i++) { 464 for (i = 0; i < adapter->num_active_rx_scrqs; i++) {
465 rx_pool = &adapter->rx_pool[i]; 465 rx_pool = &adapter->rx_pool[i];
466 466
467 netdev_dbg(adapter->netdev, "Releasing rx_pool[%d]\n", i); 467 netdev_dbg(adapter->netdev, "Releasing rx_pool[%d]\n", i);
@@ -484,7 +484,6 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter)
484 484
485 kfree(adapter->rx_pool); 485 kfree(adapter->rx_pool);
486 adapter->rx_pool = NULL; 486 adapter->rx_pool = NULL;
487 adapter->num_active_rx_pools = 0;
488} 487}
489 488
490static int init_rx_pools(struct net_device *netdev) 489static int init_rx_pools(struct net_device *netdev)
@@ -509,8 +508,6 @@ static int init_rx_pools(struct net_device *netdev)
509 return -1; 508 return -1;
510 } 509 }
511 510
512 adapter->num_active_rx_pools = 0;
513
514 for (i = 0; i < rxadd_subcrqs; i++) { 511 for (i = 0; i < rxadd_subcrqs; i++) {
515 rx_pool = &adapter->rx_pool[i]; 512 rx_pool = &adapter->rx_pool[i];
516 513
@@ -554,8 +551,6 @@ static int init_rx_pools(struct net_device *netdev)
554 rx_pool->next_free = 0; 551 rx_pool->next_free = 0;
555 } 552 }
556 553
557 adapter->num_active_rx_pools = rxadd_subcrqs;
558
559 return 0; 554 return 0;
560} 555}
561 556
@@ -613,7 +608,7 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter)
613 if (!adapter->tx_pool) 608 if (!adapter->tx_pool)
614 return; 609 return;
615 610
616 for (i = 0; i < adapter->num_active_tx_pools; i++) { 611 for (i = 0; i < adapter->num_active_tx_scrqs; i++) {
617 netdev_dbg(adapter->netdev, "Releasing tx_pool[%d]\n", i); 612 netdev_dbg(adapter->netdev, "Releasing tx_pool[%d]\n", i);
618 tx_pool = &adapter->tx_pool[i]; 613 tx_pool = &adapter->tx_pool[i];
619 kfree(tx_pool->tx_buff); 614 kfree(tx_pool->tx_buff);
@@ -624,7 +619,6 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter)
624 619
625 kfree(adapter->tx_pool); 620 kfree(adapter->tx_pool);
626 adapter->tx_pool = NULL; 621 adapter->tx_pool = NULL;
627 adapter->num_active_tx_pools = 0;
628} 622}
629 623
630static int init_tx_pools(struct net_device *netdev) 624static int init_tx_pools(struct net_device *netdev)
@@ -641,8 +635,6 @@ static int init_tx_pools(struct net_device *netdev)
641 if (!adapter->tx_pool) 635 if (!adapter->tx_pool)
642 return -1; 636 return -1;
643 637
644 adapter->num_active_tx_pools = 0;
645
646 for (i = 0; i < tx_subcrqs; i++) { 638 for (i = 0; i < tx_subcrqs; i++) {
647 tx_pool = &adapter->tx_pool[i]; 639 tx_pool = &adapter->tx_pool[i];
648 640
@@ -690,8 +682,6 @@ static int init_tx_pools(struct net_device *netdev)
690 tx_pool->producer_index = 0; 682 tx_pool->producer_index = 0;
691 } 683 }
692 684
693 adapter->num_active_tx_pools = tx_subcrqs;
694
695 return 0; 685 return 0;
696} 686}
697 687
@@ -740,6 +730,43 @@ static void ibmvnic_napi_disable(struct ibmvnic_adapter *adapter)
740 adapter->napi_enabled = false; 730 adapter->napi_enabled = false;
741} 731}
742 732
733static int init_napi(struct ibmvnic_adapter *adapter)
734{
735 int i;
736
737 adapter->napi = kcalloc(adapter->req_rx_queues,
738 sizeof(struct napi_struct), GFP_KERNEL);
739 if (!adapter->napi)
740 return -ENOMEM;
741
742 for (i = 0; i < adapter->req_rx_queues; i++) {
743 netdev_dbg(adapter->netdev, "Adding napi[%d]\n", i);
744 netif_napi_add(adapter->netdev, &adapter->napi[i],
745 ibmvnic_poll, NAPI_POLL_WEIGHT);
746 }
747
748 return 0;
749}
750
751static void release_napi(struct ibmvnic_adapter *adapter)
752{
753 int i;
754
755 if (!adapter->napi)
756 return;
757
758 for (i = 0; i < adapter->num_active_rx_scrqs; i++) {
759 if (&adapter->napi[i]) {
760 netdev_dbg(adapter->netdev,
761 "Releasing napi[%d]\n", i);
762 netif_napi_del(&adapter->napi[i]);
763 }
764 }
765
766 kfree(adapter->napi);
767 adapter->napi = NULL;
768}
769
743static int ibmvnic_login(struct net_device *netdev) 770static int ibmvnic_login(struct net_device *netdev)
744{ 771{
745 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 772 struct ibmvnic_adapter *adapter = netdev_priv(netdev);
@@ -750,7 +777,7 @@ static int ibmvnic_login(struct net_device *netdev)
750 do { 777 do {
751 if (adapter->renegotiate) { 778 if (adapter->renegotiate) {
752 adapter->renegotiate = false; 779 adapter->renegotiate = false;
753 release_sub_crqs(adapter); 780 release_sub_crqs(adapter, 1);
754 781
755 reinit_completion(&adapter->init_done); 782 reinit_completion(&adapter->init_done);
756 send_cap_queries(adapter); 783 send_cap_queries(adapter);
@@ -791,10 +818,20 @@ static int ibmvnic_login(struct net_device *netdev)
791 return 0; 818 return 0;
792} 819}
793 820
794static void release_resources(struct ibmvnic_adapter *adapter) 821static void release_login_buffer(struct ibmvnic_adapter *adapter)
795{ 822{
796 int i; 823 kfree(adapter->login_buf);
824 adapter->login_buf = NULL;
825}
826
827static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter)
828{
829 kfree(adapter->login_rsp_buf);
830 adapter->login_rsp_buf = NULL;
831}
797 832
833static void release_resources(struct ibmvnic_adapter *adapter)
834{
798 release_vpd_data(adapter); 835 release_vpd_data(adapter);
799 836
800 release_tx_pools(adapter); 837 release_tx_pools(adapter);
@@ -803,16 +840,8 @@ static void release_resources(struct ibmvnic_adapter *adapter)
803 release_stats_token(adapter); 840 release_stats_token(adapter);
804 release_stats_buffers(adapter); 841 release_stats_buffers(adapter);
805 release_error_buffers(adapter); 842 release_error_buffers(adapter);
806 843 release_napi(adapter);
807 if (adapter->napi) { 844 release_login_rsp_buffer(adapter);
808 for (i = 0; i < adapter->req_rx_queues; i++) {
809 if (&adapter->napi[i]) {
810 netdev_dbg(adapter->netdev,
811 "Releasing napi[%d]\n", i);
812 netif_napi_del(&adapter->napi[i]);
813 }
814 }
815 }
816} 845}
817 846
818static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state) 847static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state)
@@ -931,7 +960,7 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
931static int init_resources(struct ibmvnic_adapter *adapter) 960static int init_resources(struct ibmvnic_adapter *adapter)
932{ 961{
933 struct net_device *netdev = adapter->netdev; 962 struct net_device *netdev = adapter->netdev;
934 int i, rc; 963 int rc;
935 964
936 rc = set_real_num_queues(netdev); 965 rc = set_real_num_queues(netdev);
937 if (rc) 966 if (rc)
@@ -957,16 +986,10 @@ static int init_resources(struct ibmvnic_adapter *adapter)
957 } 986 }
958 987
959 adapter->map_id = 1; 988 adapter->map_id = 1;
960 adapter->napi = kcalloc(adapter->req_rx_queues,
961 sizeof(struct napi_struct), GFP_KERNEL);
962 if (!adapter->napi)
963 return -ENOMEM;
964 989
965 for (i = 0; i < adapter->req_rx_queues; i++) { 990 rc = init_napi(adapter);
966 netdev_dbg(netdev, "Adding napi[%d]\n", i); 991 if (rc)
967 netif_napi_add(netdev, &adapter->napi[i], ibmvnic_poll, 992 return rc;
968 NAPI_POLL_WEIGHT);
969 }
970 993
971 send_map_query(adapter); 994 send_map_query(adapter);
972 995
@@ -975,6 +998,10 @@ static int init_resources(struct ibmvnic_adapter *adapter)
975 return rc; 998 return rc;
976 999
977 rc = init_tx_pools(netdev); 1000 rc = init_tx_pools(netdev);
1001
1002 adapter->num_active_tx_scrqs = adapter->req_tx_queues;
1003 adapter->num_active_rx_scrqs = adapter->req_rx_queues;
1004
978 return rc; 1005 return rc;
979} 1006}
980 1007
@@ -1057,6 +1084,35 @@ static int ibmvnic_open(struct net_device *netdev)
1057 return rc; 1084 return rc;
1058} 1085}
1059 1086
1087static void clean_rx_pools(struct ibmvnic_adapter *adapter)
1088{
1089 struct ibmvnic_rx_pool *rx_pool;
1090 u64 rx_entries;
1091 int rx_scrqs;
1092 int i, j;
1093
1094 if (!adapter->rx_pool)
1095 return;
1096
1097 rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
1098 rx_entries = adapter->req_rx_add_entries_per_subcrq;
1099
1100 /* Free any remaining skbs in the rx buffer pools */
1101 for (i = 0; i < rx_scrqs; i++) {
1102 rx_pool = &adapter->rx_pool[i];
1103 if (!rx_pool)
1104 continue;
1105
1106 netdev_dbg(adapter->netdev, "Cleaning rx_pool[%d]\n", i);
1107 for (j = 0; j < rx_entries; j++) {
1108 if (rx_pool->rx_buff[j].skb) {
1109 dev_kfree_skb_any(rx_pool->rx_buff[j].skb);
1110 rx_pool->rx_buff[j].skb = NULL;
1111 }
1112 }
1113 }
1114}
1115
1060static void clean_tx_pools(struct ibmvnic_adapter *adapter) 1116static void clean_tx_pools(struct ibmvnic_adapter *adapter)
1061{ 1117{
1062 struct ibmvnic_tx_pool *tx_pool; 1118 struct ibmvnic_tx_pool *tx_pool;
@@ -1134,7 +1190,7 @@ static int __ibmvnic_close(struct net_device *netdev)
1134 } 1190 }
1135 } 1191 }
1136 } 1192 }
1137 1193 clean_rx_pools(adapter);
1138 clean_tx_pools(adapter); 1194 clean_tx_pools(adapter);
1139 adapter->state = VNIC_CLOSED; 1195 adapter->state = VNIC_CLOSED;
1140 return rc; 1196 return rc;
@@ -1422,6 +1478,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
1422 if ((*hdrs >> 7) & 1) { 1478 if ((*hdrs >> 7) & 1) {
1423 build_hdr_descs_arr(tx_buff, &num_entries, *hdrs); 1479 build_hdr_descs_arr(tx_buff, &num_entries, *hdrs);
1424 tx_crq.v1.n_crq_elem = num_entries; 1480 tx_crq.v1.n_crq_elem = num_entries;
1481 tx_buff->num_entries = num_entries;
1425 tx_buff->indir_arr[0] = tx_crq; 1482 tx_buff->indir_arr[0] = tx_crq;
1426 tx_buff->indir_dma = dma_map_single(dev, tx_buff->indir_arr, 1483 tx_buff->indir_dma = dma_map_single(dev, tx_buff->indir_arr,
1427 sizeof(tx_buff->indir_arr), 1484 sizeof(tx_buff->indir_arr),
@@ -1470,7 +1527,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
1470 goto out; 1527 goto out;
1471 } 1528 }
1472 1529
1473 if (atomic_inc_return(&tx_scrq->used) 1530 if (atomic_add_return(num_entries, &tx_scrq->used)
1474 >= adapter->req_tx_entries_per_subcrq) { 1531 >= adapter->req_tx_entries_per_subcrq) {
1475 netdev_info(netdev, "Stopping queue %d\n", queue_num); 1532 netdev_info(netdev, "Stopping queue %d\n", queue_num);
1476 netif_stop_subqueue(netdev, queue_num); 1533 netif_stop_subqueue(netdev, queue_num);
@@ -1608,7 +1665,7 @@ static int do_reset(struct ibmvnic_adapter *adapter,
1608 if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM || 1665 if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM ||
1609 adapter->wait_for_reset) { 1666 adapter->wait_for_reset) {
1610 release_resources(adapter); 1667 release_resources(adapter);
1611 release_sub_crqs(adapter); 1668 release_sub_crqs(adapter, 1);
1612 release_crq_queue(adapter); 1669 release_crq_queue(adapter);
1613 } 1670 }
1614 1671
@@ -1646,6 +1703,12 @@ static int do_reset(struct ibmvnic_adapter *adapter,
1646 release_tx_pools(adapter); 1703 release_tx_pools(adapter);
1647 init_rx_pools(netdev); 1704 init_rx_pools(netdev);
1648 init_tx_pools(netdev); 1705 init_tx_pools(netdev);
1706
1707 release_napi(adapter);
1708 init_napi(adapter);
1709
1710 adapter->num_active_tx_scrqs = adapter->req_tx_queues;
1711 adapter->num_active_rx_scrqs = adapter->req_rx_queues;
1649 } else { 1712 } else {
1650 rc = reset_tx_pools(adapter); 1713 rc = reset_tx_pools(adapter);
1651 if (rc) 1714 if (rc)
@@ -1670,8 +1733,6 @@ static int do_reset(struct ibmvnic_adapter *adapter,
1670 return 0; 1733 return 0;
1671 } 1734 }
1672 1735
1673 netif_carrier_on(netdev);
1674
1675 /* kick napi */ 1736 /* kick napi */
1676 for (i = 0; i < adapter->req_rx_queues; i++) 1737 for (i = 0; i < adapter->req_rx_queues; i++)
1677 napi_schedule(&adapter->napi[i]); 1738 napi_schedule(&adapter->napi[i]);
@@ -1679,6 +1740,8 @@ static int do_reset(struct ibmvnic_adapter *adapter,
1679 if (adapter->reset_reason != VNIC_RESET_FAILOVER) 1740 if (adapter->reset_reason != VNIC_RESET_FAILOVER)
1680 netdev_notify_peers(netdev); 1741 netdev_notify_peers(netdev);
1681 1742
1743 netif_carrier_on(netdev);
1744
1682 return 0; 1745 return 0;
1683} 1746}
1684 1747
@@ -1853,6 +1916,7 @@ restart_poll:
1853 be16_to_cpu(next->rx_comp.rc)); 1916 be16_to_cpu(next->rx_comp.rc));
1854 /* free the entry */ 1917 /* free the entry */
1855 next->rx_comp.first = 0; 1918 next->rx_comp.first = 0;
1919 dev_kfree_skb_any(rx_buff->skb);
1856 remove_buff_from_pool(adapter, rx_buff); 1920 remove_buff_from_pool(adapter, rx_buff);
1857 continue; 1921 continue;
1858 } 1922 }
@@ -2244,24 +2308,27 @@ static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter)
2244} 2308}
2245 2309
2246static void release_sub_crq_queue(struct ibmvnic_adapter *adapter, 2310static void release_sub_crq_queue(struct ibmvnic_adapter *adapter,
2247 struct ibmvnic_sub_crq_queue *scrq) 2311 struct ibmvnic_sub_crq_queue *scrq,
2312 bool do_h_free)
2248{ 2313{
2249 struct device *dev = &adapter->vdev->dev; 2314 struct device *dev = &adapter->vdev->dev;
2250 long rc; 2315 long rc;
2251 2316
2252 netdev_dbg(adapter->netdev, "Releasing sub-CRQ\n"); 2317 netdev_dbg(adapter->netdev, "Releasing sub-CRQ\n");
2253 2318
2254 /* Close the sub-crqs */ 2319 if (do_h_free) {
2255 do { 2320 /* Close the sub-crqs */
2256 rc = plpar_hcall_norets(H_FREE_SUB_CRQ, 2321 do {
2257 adapter->vdev->unit_address, 2322 rc = plpar_hcall_norets(H_FREE_SUB_CRQ,
2258 scrq->crq_num); 2323 adapter->vdev->unit_address,
2259 } while (rc == H_BUSY || H_IS_LONG_BUSY(rc)); 2324 scrq->crq_num);
2325 } while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
2260 2326
2261 if (rc) { 2327 if (rc) {
2262 netdev_err(adapter->netdev, 2328 netdev_err(adapter->netdev,
2263 "Failed to release sub-CRQ %16lx, rc = %ld\n", 2329 "Failed to release sub-CRQ %16lx, rc = %ld\n",
2264 scrq->crq_num, rc); 2330 scrq->crq_num, rc);
2331 }
2265 } 2332 }
2266 2333
2267 dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE, 2334 dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE,
@@ -2329,12 +2396,21 @@ zero_page_failed:
2329 return NULL; 2396 return NULL;
2330} 2397}
2331 2398
2332static void release_sub_crqs(struct ibmvnic_adapter *adapter) 2399static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free)
2333{ 2400{
2401 u64 num_tx_scrqs, num_rx_scrqs;
2334 int i; 2402 int i;
2335 2403
2404 if (adapter->state == VNIC_PROBED) {
2405 num_tx_scrqs = adapter->req_tx_queues;
2406 num_rx_scrqs = adapter->req_rx_queues;
2407 } else {
2408 num_tx_scrqs = adapter->num_active_tx_scrqs;
2409 num_rx_scrqs = adapter->num_active_rx_scrqs;
2410 }
2411
2336 if (adapter->tx_scrq) { 2412 if (adapter->tx_scrq) {
2337 for (i = 0; i < adapter->req_tx_queues; i++) { 2413 for (i = 0; i < num_tx_scrqs; i++) {
2338 if (!adapter->tx_scrq[i]) 2414 if (!adapter->tx_scrq[i])
2339 continue; 2415 continue;
2340 2416
@@ -2347,7 +2423,8 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
2347 adapter->tx_scrq[i]->irq = 0; 2423 adapter->tx_scrq[i]->irq = 0;
2348 } 2424 }
2349 2425
2350 release_sub_crq_queue(adapter, adapter->tx_scrq[i]); 2426 release_sub_crq_queue(adapter, adapter->tx_scrq[i],
2427 do_h_free);
2351 } 2428 }
2352 2429
2353 kfree(adapter->tx_scrq); 2430 kfree(adapter->tx_scrq);
@@ -2355,7 +2432,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
2355 } 2432 }
2356 2433
2357 if (adapter->rx_scrq) { 2434 if (adapter->rx_scrq) {
2358 for (i = 0; i < adapter->req_rx_queues; i++) { 2435 for (i = 0; i < num_rx_scrqs; i++) {
2359 if (!adapter->rx_scrq[i]) 2436 if (!adapter->rx_scrq[i])
2360 continue; 2437 continue;
2361 2438
@@ -2368,7 +2445,8 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
2368 adapter->rx_scrq[i]->irq = 0; 2445 adapter->rx_scrq[i]->irq = 0;
2369 } 2446 }
2370 2447
2371 release_sub_crq_queue(adapter, adapter->rx_scrq[i]); 2448 release_sub_crq_queue(adapter, adapter->rx_scrq[i],
2449 do_h_free);
2372 } 2450 }
2373 2451
2374 kfree(adapter->rx_scrq); 2452 kfree(adapter->rx_scrq);
@@ -2422,6 +2500,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
2422restart_loop: 2500restart_loop:
2423 while (pending_scrq(adapter, scrq)) { 2501 while (pending_scrq(adapter, scrq)) {
2424 unsigned int pool = scrq->pool_index; 2502 unsigned int pool = scrq->pool_index;
2503 int num_entries = 0;
2425 2504
2426 next = ibmvnic_next_scrq(adapter, scrq); 2505 next = ibmvnic_next_scrq(adapter, scrq);
2427 for (i = 0; i < next->tx_comp.num_comps; i++) { 2506 for (i = 0; i < next->tx_comp.num_comps; i++) {
@@ -2452,6 +2531,8 @@ restart_loop:
2452 txbuff->skb = NULL; 2531 txbuff->skb = NULL;
2453 } 2532 }
2454 2533
2534 num_entries += txbuff->num_entries;
2535
2455 adapter->tx_pool[pool].free_map[adapter->tx_pool[pool]. 2536 adapter->tx_pool[pool].free_map[adapter->tx_pool[pool].
2456 producer_index] = index; 2537 producer_index] = index;
2457 adapter->tx_pool[pool].producer_index = 2538 adapter->tx_pool[pool].producer_index =
@@ -2461,7 +2542,7 @@ restart_loop:
2461 /* remove tx_comp scrq*/ 2542 /* remove tx_comp scrq*/
2462 next->tx_comp.first = 0; 2543 next->tx_comp.first = 0;
2463 2544
2464 if (atomic_sub_return(next->tx_comp.num_comps, &scrq->used) <= 2545 if (atomic_sub_return(num_entries, &scrq->used) <=
2465 (adapter->req_tx_entries_per_subcrq / 2) && 2546 (adapter->req_tx_entries_per_subcrq / 2) &&
2466 __netif_subqueue_stopped(adapter->netdev, 2547 __netif_subqueue_stopped(adapter->netdev,
2467 scrq->pool_index)) { 2548 scrq->pool_index)) {
@@ -2539,7 +2620,7 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter)
2539 dev_err(dev, "Couldn't register tx irq 0x%x. rc=%d\n", 2620 dev_err(dev, "Couldn't register tx irq 0x%x. rc=%d\n",
2540 scrq->irq, rc); 2621 scrq->irq, rc);
2541 irq_dispose_mapping(scrq->irq); 2622 irq_dispose_mapping(scrq->irq);
2542 goto req_rx_irq_failed; 2623 goto req_tx_irq_failed;
2543 } 2624 }
2544 } 2625 }
2545 2626
@@ -2575,7 +2656,7 @@ req_tx_irq_failed:
2575 free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]); 2656 free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]);
2576 irq_dispose_mapping(adapter->rx_scrq[j]->irq); 2657 irq_dispose_mapping(adapter->rx_scrq[j]->irq);
2577 } 2658 }
2578 release_sub_crqs(adapter); 2659 release_sub_crqs(adapter, 1);
2579 return rc; 2660 return rc;
2580} 2661}
2581 2662
@@ -2657,7 +2738,7 @@ rx_failed:
2657 adapter->tx_scrq = NULL; 2738 adapter->tx_scrq = NULL;
2658tx_failed: 2739tx_failed:
2659 for (i = 0; i < registered_queues; i++) 2740 for (i = 0; i < registered_queues; i++)
2660 release_sub_crq_queue(adapter, allqueues[i]); 2741 release_sub_crq_queue(adapter, allqueues[i], 1);
2661 kfree(allqueues); 2742 kfree(allqueues);
2662 return -1; 2743 return -1;
2663} 2744}
@@ -3013,6 +3094,7 @@ static void send_login(struct ibmvnic_adapter *adapter)
3013 struct vnic_login_client_data *vlcd; 3094 struct vnic_login_client_data *vlcd;
3014 int i; 3095 int i;
3015 3096
3097 release_login_rsp_buffer(adapter);
3016 client_data_len = vnic_client_data_len(adapter); 3098 client_data_len = vnic_client_data_len(adapter);
3017 3099
3018 buffer_size = 3100 buffer_size =
@@ -3708,6 +3790,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
3708 3790
3709 dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz, 3791 dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz,
3710 DMA_BIDIRECTIONAL); 3792 DMA_BIDIRECTIONAL);
3793 release_login_buffer(adapter);
3711 dma_unmap_single(dev, adapter->login_rsp_buf_token, 3794 dma_unmap_single(dev, adapter->login_rsp_buf_token,
3712 adapter->login_rsp_buf_sz, DMA_BIDIRECTIONAL); 3795 adapter->login_rsp_buf_sz, DMA_BIDIRECTIONAL);
3713 3796
@@ -4282,6 +4365,7 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
4282{ 4365{
4283 struct device *dev = &adapter->vdev->dev; 4366 struct device *dev = &adapter->vdev->dev;
4284 unsigned long timeout = msecs_to_jiffies(30000); 4367 unsigned long timeout = msecs_to_jiffies(30000);
4368 u64 old_num_rx_queues, old_num_tx_queues;
4285 int rc; 4369 int rc;
4286 4370
4287 if (adapter->resetting && !adapter->wait_for_reset) { 4371 if (adapter->resetting && !adapter->wait_for_reset) {
@@ -4299,6 +4383,9 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
4299 4383
4300 adapter->from_passive_init = false; 4384 adapter->from_passive_init = false;
4301 4385
4386 old_num_rx_queues = adapter->req_rx_queues;
4387 old_num_tx_queues = adapter->req_tx_queues;
4388
4302 init_completion(&adapter->init_done); 4389 init_completion(&adapter->init_done);
4303 adapter->init_done_rc = 0; 4390 adapter->init_done_rc = 0;
4304 ibmvnic_send_crq_init(adapter); 4391 ibmvnic_send_crq_init(adapter);
@@ -4318,10 +4405,18 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
4318 return -1; 4405 return -1;
4319 } 4406 }
4320 4407
4321 if (adapter->resetting && !adapter->wait_for_reset) 4408 if (adapter->resetting && !adapter->wait_for_reset) {
4322 rc = reset_sub_crq_queues(adapter); 4409 if (adapter->req_rx_queues != old_num_rx_queues ||
4323 else 4410 adapter->req_tx_queues != old_num_tx_queues) {
4411 release_sub_crqs(adapter, 0);
4412 rc = init_sub_crqs(adapter);
4413 } else {
4414 rc = reset_sub_crq_queues(adapter);
4415 }
4416 } else {
4324 rc = init_sub_crqs(adapter); 4417 rc = init_sub_crqs(adapter);
4418 }
4419
4325 if (rc) { 4420 if (rc) {
4326 dev_err(dev, "Initialization of sub crqs failed\n"); 4421 dev_err(dev, "Initialization of sub crqs failed\n");
4327 release_crq_queue(adapter); 4422 release_crq_queue(adapter);
@@ -4421,7 +4516,7 @@ ibmvnic_register_fail:
4421 device_remove_file(&dev->dev, &dev_attr_failover); 4516 device_remove_file(&dev->dev, &dev_attr_failover);
4422 4517
4423ibmvnic_init_fail: 4518ibmvnic_init_fail:
4424 release_sub_crqs(adapter); 4519 release_sub_crqs(adapter, 1);
4425 release_crq_queue(adapter); 4520 release_crq_queue(adapter);
4426 free_netdev(netdev); 4521 free_netdev(netdev);
4427 4522
@@ -4438,7 +4533,7 @@ static int ibmvnic_remove(struct vio_dev *dev)
4438 mutex_lock(&adapter->reset_lock); 4533 mutex_lock(&adapter->reset_lock);
4439 4534
4440 release_resources(adapter); 4535 release_resources(adapter);
4441 release_sub_crqs(adapter); 4536 release_sub_crqs(adapter, 1);
4442 release_crq_queue(adapter); 4537 release_crq_queue(adapter);
4443 4538
4444 adapter->state = VNIC_REMOVED; 4539 adapter->state = VNIC_REMOVED;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index fe21a6e2ddae..68e712c69211 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -909,6 +909,7 @@ struct ibmvnic_tx_buff {
909 union sub_crq indir_arr[6]; 909 union sub_crq indir_arr[6];
910 u8 hdr_data[140]; 910 u8 hdr_data[140];
911 dma_addr_t indir_dma; 911 dma_addr_t indir_dma;
912 int num_entries;
912}; 913};
913 914
914struct ibmvnic_tx_pool { 915struct ibmvnic_tx_pool {
@@ -1091,8 +1092,8 @@ struct ibmvnic_adapter {
1091 u64 opt_rxba_entries_per_subcrq; 1092 u64 opt_rxba_entries_per_subcrq;
1092 __be64 tx_rx_desc_req; 1093 __be64 tx_rx_desc_req;
1093 u8 map_id; 1094 u8 map_id;
1094 u64 num_active_rx_pools; 1095 u64 num_active_rx_scrqs;
1095 u64 num_active_tx_pools; 1096 u64 num_active_tx_scrqs;
1096 1097
1097 struct tasklet_struct tasklet; 1098 struct tasklet_struct tasklet;
1098 enum vnic_state state; 1099 enum vnic_state state;
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 46e9f4e0a02c..36d9401a6258 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -507,6 +507,7 @@ struct i40e_pf {
507#define I40E_HW_STOP_FW_LLDP BIT(16) 507#define I40E_HW_STOP_FW_LLDP BIT(16)
508#define I40E_HW_PORT_ID_VALID BIT(17) 508#define I40E_HW_PORT_ID_VALID BIT(17)
509#define I40E_HW_RESTART_AUTONEG BIT(18) 509#define I40E_HW_RESTART_AUTONEG BIT(18)
510#define I40E_HW_STOPPABLE_FW_LLDP BIT(19)
510 511
511 u64 flags; 512 u64 flags;
512#define I40E_FLAG_RX_CSUM_ENABLED BIT_ULL(0) 513#define I40E_FLAG_RX_CSUM_ENABLED BIT_ULL(0)
@@ -824,6 +825,7 @@ struct i40e_q_vector {
824 struct i40e_ring_container rx; 825 struct i40e_ring_container rx;
825 struct i40e_ring_container tx; 826 struct i40e_ring_container tx;
826 827
828 u8 itr_countdown; /* when 0 should adjust adaptive ITR */
827 u8 num_ringpairs; /* total number of ring pairs in vector */ 829 u8 num_ringpairs; /* total number of ring pairs in vector */
828 830
829 cpumask_t affinity_mask; 831 cpumask_t affinity_mask;
@@ -832,8 +834,6 @@ struct i40e_q_vector {
832 struct rcu_head rcu; /* to avoid race with update stats on free */ 834 struct rcu_head rcu; /* to avoid race with update stats on free */
833 char name[I40E_INT_NAME_STR_LEN]; 835 char name[I40E_INT_NAME_STR_LEN];
834 bool arm_wb_state; 836 bool arm_wb_state;
835#define ITR_COUNTDOWN_START 100
836 u8 itr_countdown; /* when 0 should adjust ITR */
837} ____cacheline_internodealigned_in_smp; 837} ____cacheline_internodealigned_in_smp;
838 838
839/* lan device */ 839/* lan device */
@@ -1109,4 +1109,10 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
1109 1109
1110int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch); 1110int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch);
1111int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate); 1111int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate);
1112int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
1113 struct i40e_cloud_filter *filter,
1114 bool add);
1115int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
1116 struct i40e_cloud_filter *filter,
1117 bool add);
1112#endif /* _I40E_H_ */ 1118#endif /* _I40E_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 4c3b4243cf65..b829fd365693 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -155,8 +155,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
155 dev_info(&pf->pdev->dev, " vlan_features = 0x%08lx\n", 155 dev_info(&pf->pdev->dev, " vlan_features = 0x%08lx\n",
156 (unsigned long int)nd->vlan_features); 156 (unsigned long int)nd->vlan_features);
157 } 157 }
158 dev_info(&pf->pdev->dev, 158 dev_info(&pf->pdev->dev, " active_vlans is %s\n",
159 " vlgrp: & = %p\n", vsi->active_vlans); 159 vsi->active_vlans ? "<valid>" : "<null>");
160 dev_info(&pf->pdev->dev, 160 dev_info(&pf->pdev->dev,
161 " flags = 0x%08lx, netdev_registered = %i, current_netdev_flags = 0x%04x\n", 161 " flags = 0x%08lx, netdev_registered = %i, current_netdev_flags = 0x%04x\n",
162 vsi->flags, vsi->netdev_registered, vsi->current_netdev_flags); 162 vsi->flags, vsi->netdev_registered, vsi->current_netdev_flags);
@@ -270,14 +270,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
270 continue; 270 continue;
271 271
272 dev_info(&pf->pdev->dev, 272 dev_info(&pf->pdev->dev,
273 " rx_rings[%i]: desc = %p\n",
274 i, rx_ring->desc);
275 dev_info(&pf->pdev->dev,
276 " rx_rings[%i]: dev = %p, netdev = %p, rx_bi = %p\n",
277 i, rx_ring->dev,
278 rx_ring->netdev,
279 rx_ring->rx_bi);
280 dev_info(&pf->pdev->dev,
281 " rx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n", 273 " rx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
282 i, *rx_ring->state, 274 i, *rx_ring->state,
283 rx_ring->queue_index, 275 rx_ring->queue_index,
@@ -307,17 +299,12 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
307 rx_ring->rx_stats.realloc_count, 299 rx_ring->rx_stats.realloc_count,
308 rx_ring->rx_stats.page_reuse_count); 300 rx_ring->rx_stats.page_reuse_count);
309 dev_info(&pf->pdev->dev, 301 dev_info(&pf->pdev->dev,
310 " rx_rings[%i]: size = %i, dma = 0x%08lx\n", 302 " rx_rings[%i]: size = %i\n",
311 i, rx_ring->size, 303 i, rx_ring->size);
312 (unsigned long int)rx_ring->dma);
313 dev_info(&pf->pdev->dev,
314 " rx_rings[%i]: vsi = %p, q_vector = %p\n",
315 i, rx_ring->vsi,
316 rx_ring->q_vector);
317 dev_info(&pf->pdev->dev, 304 dev_info(&pf->pdev->dev,
318 " rx_rings[%i]: rx_itr_setting = %d (%s)\n", 305 " rx_rings[%i]: itr_setting = %d (%s)\n",
319 i, rx_ring->rx_itr_setting, 306 i, rx_ring->itr_setting,
320 ITR_IS_DYNAMIC(rx_ring->rx_itr_setting) ? "dynamic" : "fixed"); 307 ITR_IS_DYNAMIC(rx_ring->itr_setting) ? "dynamic" : "fixed");
321 } 308 }
322 for (i = 0; i < vsi->num_queue_pairs; i++) { 309 for (i = 0; i < vsi->num_queue_pairs; i++) {
323 struct i40e_ring *tx_ring = READ_ONCE(vsi->tx_rings[i]); 310 struct i40e_ring *tx_ring = READ_ONCE(vsi->tx_rings[i]);
@@ -326,14 +313,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
326 continue; 313 continue;
327 314
328 dev_info(&pf->pdev->dev, 315 dev_info(&pf->pdev->dev,
329 " tx_rings[%i]: desc = %p\n",
330 i, tx_ring->desc);
331 dev_info(&pf->pdev->dev,
332 " tx_rings[%i]: dev = %p, netdev = %p, tx_bi = %p\n",
333 i, tx_ring->dev,
334 tx_ring->netdev,
335 tx_ring->tx_bi);
336 dev_info(&pf->pdev->dev,
337 " tx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n", 316 " tx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
338 i, *tx_ring->state, 317 i, *tx_ring->state,
339 tx_ring->queue_index, 318 tx_ring->queue_index,
@@ -355,20 +334,15 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
355 tx_ring->tx_stats.tx_busy, 334 tx_ring->tx_stats.tx_busy,
356 tx_ring->tx_stats.tx_done_old); 335 tx_ring->tx_stats.tx_done_old);
357 dev_info(&pf->pdev->dev, 336 dev_info(&pf->pdev->dev,
358 " tx_rings[%i]: size = %i, dma = 0x%08lx\n", 337 " tx_rings[%i]: size = %i\n",
359 i, tx_ring->size, 338 i, tx_ring->size);
360 (unsigned long int)tx_ring->dma);
361 dev_info(&pf->pdev->dev,
362 " tx_rings[%i]: vsi = %p, q_vector = %p\n",
363 i, tx_ring->vsi,
364 tx_ring->q_vector);
365 dev_info(&pf->pdev->dev, 339 dev_info(&pf->pdev->dev,
366 " tx_rings[%i]: DCB tc = %d\n", 340 " tx_rings[%i]: DCB tc = %d\n",
367 i, tx_ring->dcb_tc); 341 i, tx_ring->dcb_tc);
368 dev_info(&pf->pdev->dev, 342 dev_info(&pf->pdev->dev,
369 " tx_rings[%i]: tx_itr_setting = %d (%s)\n", 343 " tx_rings[%i]: itr_setting = %d (%s)\n",
370 i, tx_ring->tx_itr_setting, 344 i, tx_ring->itr_setting,
371 ITR_IS_DYNAMIC(tx_ring->tx_itr_setting) ? "dynamic" : "fixed"); 345 ITR_IS_DYNAMIC(tx_ring->itr_setting) ? "dynamic" : "fixed");
372 } 346 }
373 rcu_read_unlock(); 347 rcu_read_unlock();
374 dev_info(&pf->pdev->dev, 348 dev_info(&pf->pdev->dev,
@@ -466,8 +440,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
466 vsi->info.resp_reserved[6], vsi->info.resp_reserved[7], 440 vsi->info.resp_reserved[6], vsi->info.resp_reserved[7],
467 vsi->info.resp_reserved[8], vsi->info.resp_reserved[9], 441 vsi->info.resp_reserved[8], vsi->info.resp_reserved[9],
468 vsi->info.resp_reserved[10], vsi->info.resp_reserved[11]); 442 vsi->info.resp_reserved[10], vsi->info.resp_reserved[11]);
469 if (vsi->back)
470 dev_info(&pf->pdev->dev, " PF = %p\n", vsi->back);
471 dev_info(&pf->pdev->dev, " idx = %d\n", vsi->idx); 443 dev_info(&pf->pdev->dev, " idx = %d\n", vsi->idx);
472 dev_info(&pf->pdev->dev, 444 dev_info(&pf->pdev->dev,
473 " tc_config: numtc = %d, enabled_tc = 0x%x\n", 445 " tc_config: numtc = %d, enabled_tc = 0x%x\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 2f5bee713fef..0dcbbda164c4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -2244,14 +2244,14 @@ static int __i40e_get_coalesce(struct net_device *netdev,
2244 rx_ring = vsi->rx_rings[queue]; 2244 rx_ring = vsi->rx_rings[queue];
2245 tx_ring = vsi->tx_rings[queue]; 2245 tx_ring = vsi->tx_rings[queue];
2246 2246
2247 if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting)) 2247 if (ITR_IS_DYNAMIC(rx_ring->itr_setting))
2248 ec->use_adaptive_rx_coalesce = 1; 2248 ec->use_adaptive_rx_coalesce = 1;
2249 2249
2250 if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting)) 2250 if (ITR_IS_DYNAMIC(tx_ring->itr_setting))
2251 ec->use_adaptive_tx_coalesce = 1; 2251 ec->use_adaptive_tx_coalesce = 1;
2252 2252
2253 ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; 2253 ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
2254 ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; 2254 ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
2255 2255
2256 /* we use the _usecs_high to store/set the interrupt rate limit 2256 /* we use the _usecs_high to store/set the interrupt rate limit
2257 * that the hardware supports, that almost but not quite 2257 * that the hardware supports, that almost but not quite
@@ -2311,34 +2311,35 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
2311 struct i40e_pf *pf = vsi->back; 2311 struct i40e_pf *pf = vsi->back;
2312 struct i40e_hw *hw = &pf->hw; 2312 struct i40e_hw *hw = &pf->hw;
2313 struct i40e_q_vector *q_vector; 2313 struct i40e_q_vector *q_vector;
2314 u16 vector, intrl; 2314 u16 intrl;
2315 2315
2316 intrl = i40e_intrl_usec_to_reg(vsi->int_rate_limit); 2316 intrl = i40e_intrl_usec_to_reg(vsi->int_rate_limit);
2317 2317
2318 rx_ring->rx_itr_setting = ec->rx_coalesce_usecs; 2318 rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
2319 tx_ring->tx_itr_setting = ec->tx_coalesce_usecs; 2319 tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
2320 2320
2321 if (ec->use_adaptive_rx_coalesce) 2321 if (ec->use_adaptive_rx_coalesce)
2322 rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC; 2322 rx_ring->itr_setting |= I40E_ITR_DYNAMIC;
2323 else 2323 else
2324 rx_ring->rx_itr_setting &= ~I40E_ITR_DYNAMIC; 2324 rx_ring->itr_setting &= ~I40E_ITR_DYNAMIC;
2325 2325
2326 if (ec->use_adaptive_tx_coalesce) 2326 if (ec->use_adaptive_tx_coalesce)
2327 tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC; 2327 tx_ring->itr_setting |= I40E_ITR_DYNAMIC;
2328 else 2328 else
2329 tx_ring->tx_itr_setting &= ~I40E_ITR_DYNAMIC; 2329 tx_ring->itr_setting &= ~I40E_ITR_DYNAMIC;
2330 2330
2331 q_vector = rx_ring->q_vector; 2331 q_vector = rx_ring->q_vector;
2332 q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); 2332 q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
2333 vector = vsi->base_vector + q_vector->v_idx;
2334 wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr);
2335 2333
2336 q_vector = tx_ring->q_vector; 2334 q_vector = tx_ring->q_vector;
2337 q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); 2335 q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
2338 vector = vsi->base_vector + q_vector->v_idx;
2339 wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr);
2340 2336
2341 wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl); 2337 /* The interrupt handler itself will take care of programming
2338 * the Tx and Rx ITR values based on the values we have entered
2339 * into the q_vector, no need to write the values now.
2340 */
2341
2342 wr32(hw, I40E_PFINT_RATEN(q_vector->reg_idx), intrl);
2342 i40e_flush(hw); 2343 i40e_flush(hw);
2343} 2344}
2344 2345
@@ -2364,11 +2365,11 @@ static int __i40e_set_coalesce(struct net_device *netdev,
2364 vsi->work_limit = ec->tx_max_coalesced_frames_irq; 2365 vsi->work_limit = ec->tx_max_coalesced_frames_irq;
2365 2366
2366 if (queue < 0) { 2367 if (queue < 0) {
2367 cur_rx_itr = vsi->rx_rings[0]->rx_itr_setting; 2368 cur_rx_itr = vsi->rx_rings[0]->itr_setting;
2368 cur_tx_itr = vsi->tx_rings[0]->tx_itr_setting; 2369 cur_tx_itr = vsi->tx_rings[0]->itr_setting;
2369 } else if (queue < vsi->num_queue_pairs) { 2370 } else if (queue < vsi->num_queue_pairs) {
2370 cur_rx_itr = vsi->rx_rings[queue]->rx_itr_setting; 2371 cur_rx_itr = vsi->rx_rings[queue]->itr_setting;
2371 cur_tx_itr = vsi->tx_rings[queue]->tx_itr_setting; 2372 cur_tx_itr = vsi->tx_rings[queue]->itr_setting;
2372 } else { 2373 } else {
2373 netif_info(pf, drv, netdev, "Invalid queue value, queue range is 0 - %d\n", 2374 netif_info(pf, drv, netdev, "Invalid queue value, queue range is 0 - %d\n",
2374 vsi->num_queue_pairs - 1); 2375 vsi->num_queue_pairs - 1);
@@ -2396,7 +2397,7 @@ static int __i40e_set_coalesce(struct net_device *netdev,
2396 return -EINVAL; 2397 return -EINVAL;
2397 } 2398 }
2398 2399
2399 if (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1)) { 2400 if (ec->rx_coalesce_usecs > I40E_MAX_ITR) {
2400 netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); 2401 netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
2401 return -EINVAL; 2402 return -EINVAL;
2402 } 2403 }
@@ -2407,16 +2408,16 @@ static int __i40e_set_coalesce(struct net_device *netdev,
2407 return -EINVAL; 2408 return -EINVAL;
2408 } 2409 }
2409 2410
2410 if (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1)) { 2411 if (ec->tx_coalesce_usecs > I40E_MAX_ITR) {
2411 netif_info(pf, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); 2412 netif_info(pf, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
2412 return -EINVAL; 2413 return -EINVAL;
2413 } 2414 }
2414 2415
2415 if (ec->use_adaptive_rx_coalesce && !cur_rx_itr) 2416 if (ec->use_adaptive_rx_coalesce && !cur_rx_itr)
2416 ec->rx_coalesce_usecs = I40E_MIN_ITR << 1; 2417 ec->rx_coalesce_usecs = I40E_MIN_ITR;
2417 2418
2418 if (ec->use_adaptive_tx_coalesce && !cur_tx_itr) 2419 if (ec->use_adaptive_tx_coalesce && !cur_tx_itr)
2419 ec->tx_coalesce_usecs = I40E_MIN_ITR << 1; 2420 ec->tx_coalesce_usecs = I40E_MIN_ITR;
2420 2421
2421 intrl_reg = i40e_intrl_usec_to_reg(ec->rx_coalesce_usecs_high); 2422 intrl_reg = i40e_intrl_usec_to_reg(ec->rx_coalesce_usecs_high);
2422 vsi->int_rate_limit = INTRL_REG_TO_USEC(intrl_reg); 2423 vsi->int_rate_limit = INTRL_REG_TO_USEC(intrl_reg);
@@ -4406,6 +4407,8 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
4406 } 4407 }
4407 4408
4408flags_complete: 4409flags_complete:
4410 changed_flags = orig_flags ^ new_flags;
4411
4409 /* Before we finalize any flag changes, we need to perform some 4412 /* Before we finalize any flag changes, we need to perform some
4410 * checks to ensure that the changes are supported and safe. 4413 * checks to ensure that the changes are supported and safe.
4411 */ 4414 */
@@ -4415,21 +4418,17 @@ flags_complete:
4415 !(pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE)) 4418 !(pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE))
4416 return -EOPNOTSUPP; 4419 return -EOPNOTSUPP;
4417 4420
4418 /* Disable FW LLDP not supported if NPAR active or if FW 4421 /* If the driver detected FW LLDP was disabled on init, this flag could
4419 * API version < 1.7 4422 * be set, however we do not support _changing_ the flag if NPAR is
4423 * enabled or FW API version < 1.7. There are situations where older
4424 * FW versions/NPAR enabled PFs could disable LLDP, however we _must_
4425 * not allow the user to enable/disable LLDP with this flag on
4426 * unsupported FW versions.
4420 */ 4427 */
4421 if (new_flags & I40E_FLAG_DISABLE_FW_LLDP) { 4428 if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
4422 if (pf->hw.func_caps.npar_enable) { 4429 if (!(pf->hw_features & I40E_HW_STOPPABLE_FW_LLDP)) {
4423 dev_warn(&pf->pdev->dev,
4424 "Unable to stop FW LLDP if NPAR active\n");
4425 return -EOPNOTSUPP;
4426 }
4427
4428 if (pf->hw.aq.api_maj_ver < 1 ||
4429 (pf->hw.aq.api_maj_ver == 1 &&
4430 pf->hw.aq.api_min_ver < 7)) {
4431 dev_warn(&pf->pdev->dev, 4430 dev_warn(&pf->pdev->dev,
4432 "FW ver does not support stopping FW LLDP\n"); 4431 "Device does not support changing FW LLDP\n");
4433 return -EOPNOTSUPP; 4432 return -EOPNOTSUPP;
4434 } 4433 }
4435 } 4434 }
@@ -4439,6 +4438,10 @@ flags_complete:
4439 * something else has modified the flags variable since we copied it 4438 * something else has modified the flags variable since we copied it
4440 * originally. We'll just punt with an error and log something in the 4439 * originally. We'll just punt with an error and log something in the
4441 * message buffer. 4440 * message buffer.
4441 *
4442 * This is the point of no return for this function. We need to have
4443 * checked any discrepancies or misconfigurations and returned
4444 * EOPNOTSUPP before updating pf->flags here.
4442 */ 4445 */
4443 if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) { 4446 if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) {
4444 dev_warn(&pf->pdev->dev, 4447 dev_warn(&pf->pdev->dev,
@@ -4446,8 +4449,6 @@ flags_complete:
4446 return -EAGAIN; 4449 return -EAGAIN;
4447 } 4450 }
4448 4451
4449 changed_flags = orig_flags ^ new_flags;
4450
4451 /* Process any additional changes needed as a result of flag changes. 4452 /* Process any additional changes needed as a result of flag changes.
4452 * The changed_flags value reflects the list of bits that were 4453 * The changed_flags value reflects the list of bits that were
4453 * changed in the code above. 4454 * changed in the code above.
@@ -4479,6 +4480,12 @@ flags_complete:
4479 } 4480 }
4480 } 4481 }
4481 4482
4483 if ((changed_flags & pf->flags &
4484 I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) &&
4485 (pf->flags & I40E_FLAG_MFP_ENABLED))
4486 dev_warn(&pf->pdev->dev,
4487 "Turning on link-down-on-close flag may affect other partitions\n");
4488
4482 if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) { 4489 if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
4483 if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) { 4490 if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) {
4484 struct i40e_dcbx_config *dcbcfg; 4491 struct i40e_dcbx_config *dcbcfg;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index e31adbc75f9c..f6d37456f3b7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -69,12 +69,6 @@ static int i40e_reset(struct i40e_pf *pf);
69static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired); 69static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired);
70static void i40e_fdir_sb_setup(struct i40e_pf *pf); 70static void i40e_fdir_sb_setup(struct i40e_pf *pf);
71static int i40e_veb_get_bw_info(struct i40e_veb *veb); 71static int i40e_veb_get_bw_info(struct i40e_veb *veb);
72static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
73 struct i40e_cloud_filter *filter,
74 bool add);
75static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
76 struct i40e_cloud_filter *filter,
77 bool add);
78static int i40e_get_capabilities(struct i40e_pf *pf, 72static int i40e_get_capabilities(struct i40e_pf *pf,
79 enum i40e_admin_queue_opc list_type); 73 enum i40e_admin_queue_opc list_type);
80 74
@@ -215,8 +209,8 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
215 209
216 if (!pile || needed == 0 || id >= I40E_PILE_VALID_BIT) { 210 if (!pile || needed == 0 || id >= I40E_PILE_VALID_BIT) {
217 dev_info(&pf->pdev->dev, 211 dev_info(&pf->pdev->dev,
218 "param err: pile=%p needed=%d id=0x%04x\n", 212 "param err: pile=%s needed=%d id=0x%04x\n",
219 pile, needed, id); 213 pile ? "<valid>" : "<null>", needed, id);
220 return -EINVAL; 214 return -EINVAL;
221 } 215 }
222 216
@@ -1380,14 +1374,7 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
1380 1374
1381 ether_addr_copy(f->macaddr, macaddr); 1375 ether_addr_copy(f->macaddr, macaddr);
1382 f->vlan = vlan; 1376 f->vlan = vlan;
1383 /* If we're in overflow promisc mode, set the state directly 1377 f->state = I40E_FILTER_NEW;
1384 * to failed, so we don't bother to try sending the filter
1385 * to the hardware.
1386 */
1387 if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state))
1388 f->state = I40E_FILTER_FAILED;
1389 else
1390 f->state = I40E_FILTER_NEW;
1391 INIT_HLIST_NODE(&f->hlist); 1378 INIT_HLIST_NODE(&f->hlist);
1392 1379
1393 key = i40e_addr_to_hkey(macaddr); 1380 key = i40e_addr_to_hkey(macaddr);
@@ -2116,17 +2103,16 @@ void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name,
2116 * @list: the list of filters to send to firmware 2103 * @list: the list of filters to send to firmware
2117 * @add_head: Position in the add hlist 2104 * @add_head: Position in the add hlist
2118 * @num_add: the number of filters to add 2105 * @num_add: the number of filters to add
2119 * @promisc_change: set to true on exit if promiscuous mode was forced on
2120 * 2106 *
2121 * Send a request to firmware via AdminQ to add a chunk of filters. Will set 2107 * Send a request to firmware via AdminQ to add a chunk of filters. Will set
2122 * promisc_changed to true if the firmware has run out of space for more 2108 * __I40E_VSI_OVERFLOW_PROMISC bit in vsi->state if the firmware has run out of
2123 * filters. 2109 * space for more filters.
2124 */ 2110 */
2125static 2111static
2126void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, 2112void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
2127 struct i40e_aqc_add_macvlan_element_data *list, 2113 struct i40e_aqc_add_macvlan_element_data *list,
2128 struct i40e_new_mac_filter *add_head, 2114 struct i40e_new_mac_filter *add_head,
2129 int num_add, bool *promisc_changed) 2115 int num_add)
2130{ 2116{
2131 struct i40e_hw *hw = &vsi->back->hw; 2117 struct i40e_hw *hw = &vsi->back->hw;
2132 int aq_err, fcnt; 2118 int aq_err, fcnt;
@@ -2136,7 +2122,6 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
2136 fcnt = i40e_update_filter_state(num_add, list, add_head); 2122 fcnt = i40e_update_filter_state(num_add, list, add_head);
2137 2123
2138 if (fcnt != num_add) { 2124 if (fcnt != num_add) {
2139 *promisc_changed = true;
2140 set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); 2125 set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
2141 dev_warn(&vsi->back->pdev->dev, 2126 dev_warn(&vsi->back->pdev->dev,
2142 "Error %s adding RX filters on %s, promiscuous mode forced on\n", 2127 "Error %s adding RX filters on %s, promiscuous mode forced on\n",
@@ -2177,11 +2162,13 @@ i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name,
2177 NULL); 2162 NULL);
2178 } 2163 }
2179 2164
2180 if (aq_ret) 2165 if (aq_ret) {
2166 set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
2181 dev_warn(&vsi->back->pdev->dev, 2167 dev_warn(&vsi->back->pdev->dev,
2182 "Error %s setting broadcast promiscuous mode on %s\n", 2168 "Error %s, forcing overflow promiscuous on %s\n",
2183 i40e_aq_str(hw, hw->aq.asq_last_status), 2169 i40e_aq_str(hw, hw->aq.asq_last_status),
2184 vsi_name); 2170 vsi_name);
2171 }
2185 2172
2186 return aq_ret; 2173 return aq_ret;
2187} 2174}
@@ -2267,9 +2254,9 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
2267 struct i40e_mac_filter *f; 2254 struct i40e_mac_filter *f;
2268 struct i40e_new_mac_filter *new, *add_head = NULL; 2255 struct i40e_new_mac_filter *new, *add_head = NULL;
2269 struct i40e_hw *hw = &vsi->back->hw; 2256 struct i40e_hw *hw = &vsi->back->hw;
2257 bool old_overflow, new_overflow;
2270 unsigned int failed_filters = 0; 2258 unsigned int failed_filters = 0;
2271 unsigned int vlan_filters = 0; 2259 unsigned int vlan_filters = 0;
2272 bool promisc_changed = false;
2273 char vsi_name[16] = "PF"; 2260 char vsi_name[16] = "PF";
2274 int filter_list_len = 0; 2261 int filter_list_len = 0;
2275 i40e_status aq_ret = 0; 2262 i40e_status aq_ret = 0;
@@ -2291,6 +2278,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
2291 usleep_range(1000, 2000); 2278 usleep_range(1000, 2000);
2292 pf = vsi->back; 2279 pf = vsi->back;
2293 2280
2281 old_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
2282
2294 if (vsi->netdev) { 2283 if (vsi->netdev) {
2295 changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags; 2284 changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
2296 vsi->current_netdev_flags = vsi->netdev->flags; 2285 vsi->current_netdev_flags = vsi->netdev->flags;
@@ -2423,12 +2412,6 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
2423 2412
2424 num_add = 0; 2413 num_add = 0;
2425 hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { 2414 hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) {
2426 if (test_bit(__I40E_VSI_OVERFLOW_PROMISC,
2427 vsi->state)) {
2428 new->state = I40E_FILTER_FAILED;
2429 continue;
2430 }
2431
2432 /* handle broadcast filters by updating the broadcast 2415 /* handle broadcast filters by updating the broadcast
2433 * promiscuous flag instead of adding a MAC filter. 2416 * promiscuous flag instead of adding a MAC filter.
2434 */ 2417 */
@@ -2464,15 +2447,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
2464 /* flush a full buffer */ 2447 /* flush a full buffer */
2465 if (num_add == filter_list_len) { 2448 if (num_add == filter_list_len) {
2466 i40e_aqc_add_filters(vsi, vsi_name, add_list, 2449 i40e_aqc_add_filters(vsi, vsi_name, add_list,
2467 add_head, num_add, 2450 add_head, num_add);
2468 &promisc_changed);
2469 memset(add_list, 0, list_size); 2451 memset(add_list, 0, list_size);
2470 num_add = 0; 2452 num_add = 0;
2471 } 2453 }
2472 } 2454 }
2473 if (num_add) { 2455 if (num_add) {
2474 i40e_aqc_add_filters(vsi, vsi_name, add_list, add_head, 2456 i40e_aqc_add_filters(vsi, vsi_name, add_list, add_head,
2475 num_add, &promisc_changed); 2457 num_add);
2476 } 2458 }
2477 /* Now move all of the filters from the temp add list back to 2459 /* Now move all of the filters from the temp add list back to
2478 * the VSI's list. 2460 * the VSI's list.
@@ -2501,24 +2483,16 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
2501 } 2483 }
2502 spin_unlock_bh(&vsi->mac_filter_hash_lock); 2484 spin_unlock_bh(&vsi->mac_filter_hash_lock);
2503 2485
2504 /* If promiscuous mode has changed, we need to calculate a new
2505 * threshold for when we are safe to exit
2506 */
2507 if (promisc_changed)
2508 vsi->promisc_threshold = (vsi->active_filters * 3) / 4;
2509
2510 /* Check if we are able to exit overflow promiscuous mode. We can 2486 /* Check if we are able to exit overflow promiscuous mode. We can
2511 * safely exit if we didn't just enter, we no longer have any failed 2487 * safely exit if we didn't just enter, we no longer have any failed
2512 * filters, and we have reduced filters below the threshold value. 2488 * filters, and we have reduced filters below the threshold value.
2513 */ 2489 */
2514 if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state) && 2490 if (old_overflow && !failed_filters &&
2515 !promisc_changed && !failed_filters && 2491 vsi->active_filters < vsi->promisc_threshold) {
2516 (vsi->active_filters < vsi->promisc_threshold)) {
2517 dev_info(&pf->pdev->dev, 2492 dev_info(&pf->pdev->dev,
2518 "filter logjam cleared on %s, leaving overflow promiscuous mode\n", 2493 "filter logjam cleared on %s, leaving overflow promiscuous mode\n",
2519 vsi_name); 2494 vsi_name);
2520 clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); 2495 clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
2521 promisc_changed = true;
2522 vsi->promisc_threshold = 0; 2496 vsi->promisc_threshold = 0;
2523 } 2497 }
2524 2498
@@ -2528,6 +2502,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
2528 goto out; 2502 goto out;
2529 } 2503 }
2530 2504
2505 new_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
2506
2507 /* If we are entering overflow promiscuous, we need to calculate a new
2508 * threshold for when we are safe to exit
2509 */
2510 if (!old_overflow && new_overflow)
2511 vsi->promisc_threshold = (vsi->active_filters * 3) / 4;
2512
2531 /* check for changes in promiscuous modes */ 2513 /* check for changes in promiscuous modes */
2532 if (changed_flags & IFF_ALLMULTI) { 2514 if (changed_flags & IFF_ALLMULTI) {
2533 bool cur_multipromisc; 2515 bool cur_multipromisc;
@@ -2548,12 +2530,11 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
2548 } 2530 }
2549 } 2531 }
2550 2532
2551 if ((changed_flags & IFF_PROMISC) || promisc_changed) { 2533 if ((changed_flags & IFF_PROMISC) || old_overflow != new_overflow) {
2552 bool cur_promisc; 2534 bool cur_promisc;
2553 2535
2554 cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) || 2536 cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) ||
2555 test_bit(__I40E_VSI_OVERFLOW_PROMISC, 2537 new_overflow);
2556 vsi->state));
2557 aq_ret = i40e_set_promiscuous(pf, cur_promisc); 2538 aq_ret = i40e_set_promiscuous(pf, cur_promisc);
2558 if (aq_ret) { 2539 if (aq_ret) {
2559 retval = i40e_aq_rc_to_posix(aq_ret, 2540 retval = i40e_aq_rc_to_posix(aq_ret,
@@ -3449,15 +3430,20 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
3449 for (i = 0; i < vsi->num_q_vectors; i++, vector++) { 3430 for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
3450 struct i40e_q_vector *q_vector = vsi->q_vectors[i]; 3431 struct i40e_q_vector *q_vector = vsi->q_vectors[i];
3451 3432
3452 q_vector->itr_countdown = ITR_COUNTDOWN_START; 3433 q_vector->rx.next_update = jiffies + 1;
3453 q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[i]->rx_itr_setting); 3434 q_vector->rx.target_itr =
3454 q_vector->rx.latency_range = I40E_LOW_LATENCY; 3435 ITR_TO_REG(vsi->rx_rings[i]->itr_setting);
3455 wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), 3436 wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),
3456 q_vector->rx.itr); 3437 q_vector->rx.target_itr);
3457 q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[i]->tx_itr_setting); 3438 q_vector->rx.current_itr = q_vector->rx.target_itr;
3458 q_vector->tx.latency_range = I40E_LOW_LATENCY; 3439
3440 q_vector->tx.next_update = jiffies + 1;
3441 q_vector->tx.target_itr =
3442 ITR_TO_REG(vsi->tx_rings[i]->itr_setting);
3459 wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), 3443 wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),
3460 q_vector->tx.itr); 3444 q_vector->tx.target_itr);
3445 q_vector->tx.current_itr = q_vector->tx.target_itr;
3446
3461 wr32(hw, I40E_PFINT_RATEN(vector - 1), 3447 wr32(hw, I40E_PFINT_RATEN(vector - 1),
3462 i40e_intrl_usec_to_reg(vsi->int_rate_limit)); 3448 i40e_intrl_usec_to_reg(vsi->int_rate_limit));
3463 3449
@@ -3558,13 +3544,14 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi)
3558 u32 val; 3544 u32 val;
3559 3545
3560 /* set the ITR configuration */ 3546 /* set the ITR configuration */
3561 q_vector->itr_countdown = ITR_COUNTDOWN_START; 3547 q_vector->rx.next_update = jiffies + 1;
3562 q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[0]->rx_itr_setting); 3548 q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting);
3563 q_vector->rx.latency_range = I40E_LOW_LATENCY; 3549 wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr);
3564 wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr); 3550 q_vector->rx.current_itr = q_vector->rx.target_itr;
3565 q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[0]->tx_itr_setting); 3551 q_vector->tx.next_update = jiffies + 1;
3566 q_vector->tx.latency_range = I40E_LOW_LATENCY; 3552 q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting);
3567 wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.itr); 3553 wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr);
3554 q_vector->tx.current_itr = q_vector->tx.target_itr;
3568 3555
3569 i40e_enable_misc_int_causes(pf); 3556 i40e_enable_misc_int_causes(pf);
3570 3557
@@ -5375,7 +5362,7 @@ out:
5375 * @vsi: VSI to be configured 5362 * @vsi: VSI to be configured
5376 * 5363 *
5377 **/ 5364 **/
5378int i40e_get_link_speed(struct i40e_vsi *vsi) 5365static int i40e_get_link_speed(struct i40e_vsi *vsi)
5379{ 5366{
5380 struct i40e_pf *pf = vsi->back; 5367 struct i40e_pf *pf = vsi->back;
5381 5368
@@ -6848,8 +6835,8 @@ i40e_set_cld_element(struct i40e_cloud_filter *filter,
6848 * Add or delete a cloud filter for a specific flow spec. 6835 * Add or delete a cloud filter for a specific flow spec.
6849 * Returns 0 if the filter were successfully added. 6836 * Returns 0 if the filter were successfully added.
6850 **/ 6837 **/
6851static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, 6838int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
6852 struct i40e_cloud_filter *filter, bool add) 6839 struct i40e_cloud_filter *filter, bool add)
6853{ 6840{
6854 struct i40e_aqc_cloud_filters_element_data cld_filter; 6841 struct i40e_aqc_cloud_filters_element_data cld_filter;
6855 struct i40e_pf *pf = vsi->back; 6842 struct i40e_pf *pf = vsi->back;
@@ -6915,9 +6902,9 @@ static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
6915 * Add or delete a cloud filter for a specific flow spec using big buffer. 6902 * Add or delete a cloud filter for a specific flow spec using big buffer.
6916 * Returns 0 if the filter were successfully added. 6903 * Returns 0 if the filter were successfully added.
6917 **/ 6904 **/
6918static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, 6905int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
6919 struct i40e_cloud_filter *filter, 6906 struct i40e_cloud_filter *filter,
6920 bool add) 6907 bool add)
6921{ 6908{
6922 struct i40e_aqc_cloud_filters_element_bb cld_filter; 6909 struct i40e_aqc_cloud_filters_element_bb cld_filter;
6923 struct i40e_pf *pf = vsi->back; 6910 struct i40e_pf *pf = vsi->back;
@@ -9215,6 +9202,17 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
9215 } 9202 }
9216 i40e_get_oem_version(&pf->hw); 9203 i40e_get_oem_version(&pf->hw);
9217 9204
9205 if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
9206 ((hw->aq.fw_maj_ver == 4 && hw->aq.fw_min_ver <= 33) ||
9207 hw->aq.fw_maj_ver < 4) && hw->mac.type == I40E_MAC_XL710) {
9208 /* The following delay is necessary for 4.33 firmware and older
9209 * to recover after EMP reset. 200 ms should suffice but we
9210 * put here 300 ms to be sure that FW is ready to operate
9211 * after reset.
9212 */
9213 mdelay(300);
9214 }
9215
9218 /* re-verify the eeprom if we just had an EMP reset */ 9216 /* re-verify the eeprom if we just had an EMP reset */
9219 if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) 9217 if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state))
9220 i40e_verify_eeprom(pf); 9218 i40e_verify_eeprom(pf);
@@ -9937,18 +9935,17 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi)
9937 9935
9938 mutex_lock(&pf->switch_mutex); 9936 mutex_lock(&pf->switch_mutex);
9939 if (!pf->vsi[vsi->idx]) { 9937 if (!pf->vsi[vsi->idx]) {
9940 dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](%p,type %d)\n", 9938 dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](type %d)\n",
9941 vsi->idx, vsi->idx, vsi, vsi->type); 9939 vsi->idx, vsi->idx, vsi->type);
9942 goto unlock_vsi; 9940 goto unlock_vsi;
9943 } 9941 }
9944 9942
9945 if (pf->vsi[vsi->idx] != vsi) { 9943 if (pf->vsi[vsi->idx] != vsi) {
9946 dev_err(&pf->pdev->dev, 9944 dev_err(&pf->pdev->dev,
9947 "pf->vsi[%d](%p, type %d) != vsi[%d](%p,type %d): no free!\n", 9945 "pf->vsi[%d](type %d) != vsi[%d](type %d): no free!\n",
9948 pf->vsi[vsi->idx]->idx, 9946 pf->vsi[vsi->idx]->idx,
9949 pf->vsi[vsi->idx],
9950 pf->vsi[vsi->idx]->type, 9947 pf->vsi[vsi->idx]->type,
9951 vsi->idx, vsi, vsi->type); 9948 vsi->idx, vsi->type);
9952 goto unlock_vsi; 9949 goto unlock_vsi;
9953 } 9950 }
9954 9951
@@ -10018,7 +10015,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
10018 ring->dcb_tc = 0; 10015 ring->dcb_tc = 0;
10019 if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) 10016 if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
10020 ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; 10017 ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
10021 ring->tx_itr_setting = pf->tx_itr_default; 10018 ring->itr_setting = pf->tx_itr_default;
10022 vsi->tx_rings[i] = ring++; 10019 vsi->tx_rings[i] = ring++;
10023 10020
10024 if (!i40e_enabled_xdp_vsi(vsi)) 10021 if (!i40e_enabled_xdp_vsi(vsi))
@@ -10036,7 +10033,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
10036 if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) 10033 if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
10037 ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; 10034 ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
10038 set_ring_xdp(ring); 10035 set_ring_xdp(ring);
10039 ring->tx_itr_setting = pf->tx_itr_default; 10036 ring->itr_setting = pf->tx_itr_default;
10040 vsi->xdp_rings[i] = ring++; 10037 vsi->xdp_rings[i] = ring++;
10041 10038
10042setup_rx: 10039setup_rx:
@@ -10049,7 +10046,7 @@ setup_rx:
10049 ring->count = vsi->num_desc; 10046 ring->count = vsi->num_desc;
10050 ring->size = 0; 10047 ring->size = 0;
10051 ring->dcb_tc = 0; 10048 ring->dcb_tc = 0;
10052 ring->rx_itr_setting = pf->rx_itr_default; 10049 ring->itr_setting = pf->rx_itr_default;
10053 vsi->rx_rings[i] = ring; 10050 vsi->rx_rings[i] = ring;
10054 } 10051 }
10055 10052
@@ -10328,9 +10325,6 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)
10328 netif_napi_add(vsi->netdev, &q_vector->napi, 10325 netif_napi_add(vsi->netdev, &q_vector->napi,
10329 i40e_napi_poll, NAPI_POLL_WEIGHT); 10326 i40e_napi_poll, NAPI_POLL_WEIGHT);
10330 10327
10331 q_vector->rx.latency_range = I40E_LOW_LATENCY;
10332 q_vector->tx.latency_range = I40E_LOW_LATENCY;
10333
10334 /* tie q_vector and vsi together */ 10328 /* tie q_vector and vsi together */
10335 vsi->q_vectors[v_idx] = q_vector; 10329 vsi->q_vectors[v_idx] = q_vector;
10336 10330
@@ -11089,6 +11083,16 @@ static int i40e_sw_init(struct i40e_pf *pf)
11089 /* IWARP needs one extra vector for CQP just like MISC.*/ 11083 /* IWARP needs one extra vector for CQP just like MISC.*/
11090 pf->num_iwarp_msix = (int)num_online_cpus() + 1; 11084 pf->num_iwarp_msix = (int)num_online_cpus() + 1;
11091 } 11085 }
11086 /* Stopping the FW LLDP engine is only supported on the
11087 * XL710 with a FW ver >= 1.7. Also, stopping FW LLDP
11088 * engine is not supported if NPAR is functioning on this
11089 * part
11090 */
11091 if (pf->hw.mac.type == I40E_MAC_XL710 &&
11092 !pf->hw.func_caps.npar_enable &&
11093 (pf->hw.aq.api_maj_ver > 1 ||
11094 (pf->hw.aq.api_maj_ver == 1 && pf->hw.aq.api_min_ver > 6)))
11095 pf->hw_features |= I40E_HW_STOPPABLE_FW_LLDP;
11092 11096
11093#ifdef CONFIG_PCI_IOV 11097#ifdef CONFIG_PCI_IOV
11094 if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) { 11098 if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index e554aa6cf070..1ec9b1d8023d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -995,99 +995,241 @@ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
995 } 995 }
996} 996}
997 997
998static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector,
999 struct i40e_ring_container *rc)
1000{
1001 return &q_vector->rx == rc;
1002}
1003
1004static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector)
1005{
1006 unsigned int divisor;
1007
1008 switch (q_vector->vsi->back->hw.phy.link_info.link_speed) {
1009 case I40E_LINK_SPEED_40GB:
1010 divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024;
1011 break;
1012 case I40E_LINK_SPEED_25GB:
1013 case I40E_LINK_SPEED_20GB:
1014 divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512;
1015 break;
1016 default:
1017 case I40E_LINK_SPEED_10GB:
1018 divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256;
1019 break;
1020 case I40E_LINK_SPEED_1GB:
1021 case I40E_LINK_SPEED_100MB:
1022 divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32;
1023 break;
1024 }
1025
1026 return divisor;
1027}
1028
998/** 1029/**
999 * i40e_set_new_dynamic_itr - Find new ITR level 1030 * i40e_update_itr - update the dynamic ITR value based on statistics
1031 * @q_vector: structure containing interrupt and ring information
1000 * @rc: structure containing ring performance data 1032 * @rc: structure containing ring performance data
1001 * 1033 *
1002 * Returns true if ITR changed, false if not 1034 * Stores a new ITR value based on packets and byte
1003 * 1035 * counts during the last interrupt. The advantage of per interrupt
1004 * Stores a new ITR value based on packets and byte counts during 1036 * computation is faster updates and more accurate ITR for the current
1005 * the last interrupt. The advantage of per interrupt computation 1037 * traffic pattern. Constants in this function were computed
1006 * is faster updates and more accurate ITR for the current traffic 1038 * based on theoretical maximum wire speed and thresholds were set based
1007 * pattern. Constants in this function were computed based on 1039 * on testing data as well as attempting to minimize response time
1008 * theoretical maximum wire speed and thresholds were set based on
1009 * testing data as well as attempting to minimize response time
1010 * while increasing bulk throughput. 1040 * while increasing bulk throughput.
1011 **/ 1041 **/
1012static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) 1042static void i40e_update_itr(struct i40e_q_vector *q_vector,
1043 struct i40e_ring_container *rc)
1013{ 1044{
1014 enum i40e_latency_range new_latency_range = rc->latency_range; 1045 unsigned int avg_wire_size, packets, bytes, itr;
1015 u32 new_itr = rc->itr; 1046 unsigned long next_update = jiffies;
1016 int bytes_per_usec;
1017 unsigned int usecs, estimated_usecs;
1018 1047
1019 if (rc->total_packets == 0 || !rc->itr) 1048 /* If we don't have any rings just leave ourselves set for maximum
1020 return false; 1049 * possible latency so we take ourselves out of the equation.
1050 */
1051 if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))
1052 return;
1021 1053
1022 usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; 1054 /* For Rx we want to push the delay up and default to low latency.
1023 bytes_per_usec = rc->total_bytes / usecs; 1055 * for Tx we want to pull the delay down and default to high latency.
1056 */
1057 itr = i40e_container_is_rx(q_vector, rc) ?
1058 I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY :
1059 I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY;
1060
1061 /* If we didn't update within up to 1 - 2 jiffies we can assume
1062 * that either packets are coming in so slow there hasn't been
1063 * any work, or that there is so much work that NAPI is dealing
1064 * with interrupt moderation and we don't need to do anything.
1065 */
1066 if (time_after(next_update, rc->next_update))
1067 goto clear_counts;
1068
1069 /* If itr_countdown is set it means we programmed an ITR within
1070 * the last 4 interrupt cycles. This has a side effect of us
1071 * potentially firing an early interrupt. In order to work around
1072 * this we need to throw out any data received for a few
1073 * interrupts following the update.
1074 */
1075 if (q_vector->itr_countdown) {
1076 itr = rc->target_itr;
1077 goto clear_counts;
1078 }
1079
1080 packets = rc->total_packets;
1081 bytes = rc->total_bytes;
1024 1082
1025 /* The calculations in this algorithm depend on interrupts actually 1083 if (i40e_container_is_rx(q_vector, rc)) {
1026 * firing at the ITR rate. This may not happen if the packet rate is 1084 /* If Rx there are 1 to 4 packets and bytes are less than
1027 * really low, or if we've been napi polling. Check to make sure 1085 * 9000 assume insufficient data to use bulk rate limiting
1028 * that's not the case before we continue. 1086 * approach unless Tx is already in bulk rate limiting. We
1087 * are likely latency driven.
1088 */
1089 if (packets && packets < 4 && bytes < 9000 &&
1090 (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) {
1091 itr = I40E_ITR_ADAPTIVE_LATENCY;
1092 goto adjust_by_size;
1093 }
1094 } else if (packets < 4) {
1095 /* If we have Tx and Rx ITR maxed and Tx ITR is running in
1096 * bulk mode and we are receiving 4 or fewer packets just
1097 * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
1098 * that the Rx can relax.
1099 */
1100 if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS &&
1101 (q_vector->rx.target_itr & I40E_ITR_MASK) ==
1102 I40E_ITR_ADAPTIVE_MAX_USECS)
1103 goto clear_counts;
1104 } else if (packets > 32) {
1105 /* If we have processed over 32 packets in a single interrupt
1106 * for Tx assume we need to switch over to "bulk" mode.
1107 */
1108 rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY;
1109 }
1110
1111 /* We have no packets to actually measure against. This means
1112 * either one of the other queues on this vector is active or
1113 * we are a Tx queue doing TSO with too high of an interrupt rate.
1114 *
1115 * Between 4 and 56 we can assume that our current interrupt delay
1116 * is only slightly too low. As such we should increase it by a small
1117 * fixed amount.
1029 */ 1118 */
1030 estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update); 1119 if (packets < 56) {
1031 if (estimated_usecs > usecs) { 1120 itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC;
1032 new_latency_range = I40E_LOW_LATENCY; 1121 if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
1033 goto reset_latency; 1122 itr &= I40E_ITR_ADAPTIVE_LATENCY;
1123 itr += I40E_ITR_ADAPTIVE_MAX_USECS;
1124 }
1125 goto clear_counts;
1034 } 1126 }
1035 1127
1036 /* simple throttlerate management 1128 if (packets <= 256) {
1037 * 0-10MB/s lowest (50000 ints/s) 1129 itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
1038 * 10-20MB/s low (20000 ints/s) 1130 itr &= I40E_ITR_MASK;
1039 * 20-1249MB/s bulk (18000 ints/s) 1131
1132 /* Between 56 and 112 is our "goldilocks" zone where we are
1133 * working out "just right". Just report that our current
1134 * ITR is good for us.
1135 */
1136 if (packets <= 112)
1137 goto clear_counts;
1138
1139 /* If packet count is 128 or greater we are likely looking
1140 * at a slight overrun of the delay we want. Try halving
1141 * our delay to see if that will cut the number of packets
1142 * in half per interrupt.
1143 */
1144 itr /= 2;
1145 itr &= I40E_ITR_MASK;
1146 if (itr < I40E_ITR_ADAPTIVE_MIN_USECS)
1147 itr = I40E_ITR_ADAPTIVE_MIN_USECS;
1148
1149 goto clear_counts;
1150 }
1151
1152 /* The paths below assume we are dealing with a bulk ITR since
1153 * number of packets is greater than 256. We are just going to have
1154 * to compute a value and try to bring the count under control,
1155 * though for smaller packet sizes there isn't much we can do as
1156 * NAPI polling will likely be kicking in sooner rather than later.
1157 */
1158 itr = I40E_ITR_ADAPTIVE_BULK;
1159
1160adjust_by_size:
1161 /* If packet counts are 256 or greater we can assume we have a gross
1162 * overestimation of what the rate should be. Instead of trying to fine
1163 * tune it just use the formula below to try and dial in an exact value
1164 * give the current packet size of the frame.
1165 */
1166 avg_wire_size = bytes / packets;
1167
1168 /* The following is a crude approximation of:
1169 * wmem_default / (size + overhead) = desired_pkts_per_int
1170 * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
1171 * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
1040 * 1172 *
1041 * The math works out because the divisor is in 10^(-6) which 1173 * Assuming wmem_default is 212992 and overhead is 640 bytes per
1042 * turns the bytes/us input value into MB/s values, but 1174 * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
1043 * make sure to use usecs, as the register values written 1175 * formula down to
1044 * are in 2 usec increments in the ITR registers, and make sure 1176 *
1045 * to use the smoothed values that the countdown timer gives us. 1177 * (170 * (size + 24)) / (size + 640) = ITR
1178 *
1179 * We first do some math on the packet size and then finally bitshift
1180 * by 8 after rounding up. We also have to account for PCIe link speed
1181 * difference as ITR scales based on this.
1046 */ 1182 */
1047 switch (new_latency_range) { 1183 if (avg_wire_size <= 60) {
1048 case I40E_LOWEST_LATENCY: 1184 /* Start at 250k ints/sec */
1049 if (bytes_per_usec > 10) 1185 avg_wire_size = 4096;
1050 new_latency_range = I40E_LOW_LATENCY; 1186 } else if (avg_wire_size <= 380) {
1051 break; 1187 /* 250K ints/sec to 60K ints/sec */
1052 case I40E_LOW_LATENCY: 1188 avg_wire_size *= 40;
1053 if (bytes_per_usec > 20) 1189 avg_wire_size += 1696;
1054 new_latency_range = I40E_BULK_LATENCY; 1190 } else if (avg_wire_size <= 1084) {
1055 else if (bytes_per_usec <= 10) 1191 /* 60K ints/sec to 36K ints/sec */
1056 new_latency_range = I40E_LOWEST_LATENCY; 1192 avg_wire_size *= 15;
1057 break; 1193 avg_wire_size += 11452;
1058 case I40E_BULK_LATENCY: 1194 } else if (avg_wire_size <= 1980) {
1059 default: 1195 /* 36K ints/sec to 30K ints/sec */
1060 if (bytes_per_usec <= 20) 1196 avg_wire_size *= 5;
1061 new_latency_range = I40E_LOW_LATENCY; 1197 avg_wire_size += 22420;
1062 break; 1198 } else {
1199 /* plateau at a limit of 30K ints/sec */
1200 avg_wire_size = 32256;
1063 } 1201 }
1064 1202
1065reset_latency: 1203 /* If we are in low latency mode halve our delay which doubles the
1066 rc->latency_range = new_latency_range; 1204 * rate to somewhere between 100K to 16K ints/sec
1205 */
1206 if (itr & I40E_ITR_ADAPTIVE_LATENCY)
1207 avg_wire_size /= 2;
1067 1208
1068 switch (new_latency_range) { 1209 /* Resultant value is 256 times larger than it needs to be. This
1069 case I40E_LOWEST_LATENCY: 1210 * gives us room to adjust the value as needed to either increase
1070 new_itr = I40E_ITR_50K; 1211 * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
1071 break; 1212 *
1072 case I40E_LOW_LATENCY: 1213 * Use addition as we have already recorded the new latency flag
1073 new_itr = I40E_ITR_20K; 1214 * for the ITR value.
1074 break; 1215 */
1075 case I40E_BULK_LATENCY: 1216 itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) *
1076 new_itr = I40E_ITR_18K; 1217 I40E_ITR_ADAPTIVE_MIN_INC;
1077 break; 1218
1078 default: 1219 if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
1079 break; 1220 itr &= I40E_ITR_ADAPTIVE_LATENCY;
1221 itr += I40E_ITR_ADAPTIVE_MAX_USECS;
1080 } 1222 }
1081 1223
1224clear_counts:
1225 /* write back value */
1226 rc->target_itr = itr;
1227
1228 /* next update should occur within next jiffy */
1229 rc->next_update = next_update + 1;
1230
1082 rc->total_bytes = 0; 1231 rc->total_bytes = 0;
1083 rc->total_packets = 0; 1232 rc->total_packets = 0;
1084 rc->last_itr_update = jiffies;
1085
1086 if (new_itr != rc->itr) {
1087 rc->itr = new_itr;
1088 return true;
1089 }
1090 return false;
1091} 1233}
1092 1234
1093/** 1235/**
@@ -1991,7 +2133,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
1991 * @rx_buffer: rx buffer to pull data from 2133 * @rx_buffer: rx buffer to pull data from
1992 * 2134 *
1993 * This function will clean up the contents of the rx_buffer. It will 2135 * This function will clean up the contents of the rx_buffer. It will
1994 * either recycle the bufer or unmap it and free the associated resources. 2136 * either recycle the buffer or unmap it and free the associated resources.
1995 */ 2137 */
1996static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, 2138static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
1997 struct i40e_rx_buffer *rx_buffer) 2139 struct i40e_rx_buffer *rx_buffer)
@@ -2274,29 +2416,45 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
2274 return failure ? budget : (int)total_rx_packets; 2416 return failure ? budget : (int)total_rx_packets;
2275} 2417}
2276 2418
2277static u32 i40e_buildreg_itr(const int type, const u16 itr) 2419static inline u32 i40e_buildreg_itr(const int type, u16 itr)
2278{ 2420{
2279 u32 val; 2421 u32 val;
2280 2422
2423 /* We don't bother with setting the CLEARPBA bit as the data sheet
2424 * points out doing so is "meaningless since it was already
2425 * auto-cleared". The auto-clearing happens when the interrupt is
2426 * asserted.
2427 *
2428 * Hardware errata 28 for also indicates that writing to a
2429 * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear
2430 * an event in the PBA anyway so we need to rely on the automask
2431 * to hold pending events for us until the interrupt is re-enabled
2432 *
2433 * The itr value is reported in microseconds, and the register
2434 * value is recorded in 2 microsecond units. For this reason we
2435 * only need to shift by the interval shift - 1 instead of the
2436 * full value.
2437 */
2438 itr &= I40E_ITR_MASK;
2439
2281 val = I40E_PFINT_DYN_CTLN_INTENA_MASK | 2440 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
2282 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
2283 (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | 2441 (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
2284 (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT); 2442 (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1));
2285 2443
2286 return val; 2444 return val;
2287} 2445}
2288 2446
2289/* a small macro to shorten up some long lines */ 2447/* a small macro to shorten up some long lines */
2290#define INTREG I40E_PFINT_DYN_CTLN 2448#define INTREG I40E_PFINT_DYN_CTLN
2291static inline int get_rx_itr(struct i40e_vsi *vsi, int idx)
2292{
2293 return vsi->rx_rings[idx]->rx_itr_setting;
2294}
2295 2449
2296static inline int get_tx_itr(struct i40e_vsi *vsi, int idx) 2450/* The act of updating the ITR will cause it to immediately trigger. In order
2297{ 2451 * to prevent this from throwing off adaptive update statistics we defer the
2298 return vsi->tx_rings[idx]->tx_itr_setting; 2452 * update so that it can only happen so often. So after either Tx or Rx are
2299} 2453 * updated we make the adaptive scheme wait until either the ITR completely
2454 * expires via the next_update expiration or we have been through at least
2455 * 3 interrupts.
2456 */
2457#define ITR_COUNTDOWN_START 3
2300 2458
2301/** 2459/**
2302 * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt 2460 * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
@@ -2308,10 +2466,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
2308 struct i40e_q_vector *q_vector) 2466 struct i40e_q_vector *q_vector)
2309{ 2467{
2310 struct i40e_hw *hw = &vsi->back->hw; 2468 struct i40e_hw *hw = &vsi->back->hw;
2311 bool rx = false, tx = false; 2469 u32 intval;
2312 u32 rxval, txval;
2313 int idx = q_vector->v_idx;
2314 int rx_itr_setting, tx_itr_setting;
2315 2470
2316 /* If we don't have MSIX, then we only need to re-enable icr0 */ 2471 /* If we don't have MSIX, then we only need to re-enable icr0 */
2317 if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) { 2472 if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) {
@@ -2319,65 +2474,49 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
2319 return; 2474 return;
2320 } 2475 }
2321 2476
2322 /* avoid dynamic calculation if in countdown mode OR if 2477 /* These will do nothing if dynamic updates are not enabled */
2323 * all dynamic is disabled 2478 i40e_update_itr(q_vector, &q_vector->tx);
2324 */ 2479 i40e_update_itr(q_vector, &q_vector->rx);
2325 rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
2326
2327 rx_itr_setting = get_rx_itr(vsi, idx);
2328 tx_itr_setting = get_tx_itr(vsi, idx);
2329
2330 if (q_vector->itr_countdown > 0 ||
2331 (!ITR_IS_DYNAMIC(rx_itr_setting) &&
2332 !ITR_IS_DYNAMIC(tx_itr_setting))) {
2333 goto enable_int;
2334 }
2335
2336 if (ITR_IS_DYNAMIC(rx_itr_setting)) {
2337 rx = i40e_set_new_dynamic_itr(&q_vector->rx);
2338 rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
2339 }
2340
2341 if (ITR_IS_DYNAMIC(tx_itr_setting)) {
2342 tx = i40e_set_new_dynamic_itr(&q_vector->tx);
2343 txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
2344 }
2345 2480
2346 if (rx || tx) { 2481 /* This block of logic allows us to get away with only updating
2347 /* get the higher of the two ITR adjustments and 2482 * one ITR value with each interrupt. The idea is to perform a
2348 * use the same value for both ITR registers 2483 * pseudo-lazy update with the following criteria.
2349 * when in adaptive mode (Rx and/or Tx) 2484 *
2350 */ 2485 * 1. Rx is given higher priority than Tx if both are in same state
2351 u16 itr = max(q_vector->tx.itr, q_vector->rx.itr); 2486 * 2. If we must reduce an ITR that is given highest priority.
2352 2487 * 3. We then give priority to increasing ITR based on amount.
2353 q_vector->tx.itr = q_vector->rx.itr = itr;
2354 txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
2355 tx = true;
2356 rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
2357 rx = true;
2358 }
2359
2360 /* only need to enable the interrupt once, but need
2361 * to possibly update both ITR values
2362 */ 2488 */
2363 if (rx) { 2489 if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
2364 /* set the INTENA_MSK_MASK so that this first write 2490 /* Rx ITR needs to be reduced, this is highest priority */
2365 * won't actually enable the interrupt, instead just 2491 intval = i40e_buildreg_itr(I40E_RX_ITR,
2366 * updating the ITR (it's bit 31 PF and VF) 2492 q_vector->rx.target_itr);
2493 q_vector->rx.current_itr = q_vector->rx.target_itr;
2494 q_vector->itr_countdown = ITR_COUNTDOWN_START;
2495 } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
2496 ((q_vector->rx.target_itr - q_vector->rx.current_itr) <
2497 (q_vector->tx.target_itr - q_vector->tx.current_itr))) {
2498 /* Tx ITR needs to be reduced, this is second priority
2499 * Tx ITR needs to be increased more than Rx, fourth priority
2367 */ 2500 */
2368 rxval |= BIT(31); 2501 intval = i40e_buildreg_itr(I40E_TX_ITR,
2369 /* don't check _DOWN because interrupt isn't being enabled */ 2502 q_vector->tx.target_itr);
2370 wr32(hw, INTREG(q_vector->reg_idx), rxval); 2503 q_vector->tx.current_itr = q_vector->tx.target_itr;
2504 q_vector->itr_countdown = ITR_COUNTDOWN_START;
2505 } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
2506 /* Rx ITR needs to be increased, third priority */
2507 intval = i40e_buildreg_itr(I40E_RX_ITR,
2508 q_vector->rx.target_itr);
2509 q_vector->rx.current_itr = q_vector->rx.target_itr;
2510 q_vector->itr_countdown = ITR_COUNTDOWN_START;
2511 } else {
2512 /* No ITR update, lowest priority */
2513 intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
2514 if (q_vector->itr_countdown)
2515 q_vector->itr_countdown--;
2371 } 2516 }
2372 2517
2373enable_int:
2374 if (!test_bit(__I40E_VSI_DOWN, vsi->state)) 2518 if (!test_bit(__I40E_VSI_DOWN, vsi->state))
2375 wr32(hw, INTREG(q_vector->reg_idx), txval); 2519 wr32(hw, INTREG(q_vector->reg_idx), intval);
2376
2377 if (q_vector->itr_countdown)
2378 q_vector->itr_countdown--;
2379 else
2380 q_vector->itr_countdown = ITR_COUNTDOWN_START;
2381} 2520}
2382 2521
2383/** 2522/**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 701b708628b0..f75a8fe68fcf 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -30,32 +30,37 @@
30#include <net/xdp.h> 30#include <net/xdp.h>
31 31
32/* Interrupt Throttling and Rate Limiting Goodies */ 32/* Interrupt Throttling and Rate Limiting Goodies */
33
34#define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */
35#define I40E_MIN_ITR 0x0001 /* reg uses 2 usec resolution */
36#define I40E_ITR_100K 0x0005
37#define I40E_ITR_50K 0x000A
38#define I40E_ITR_20K 0x0019
39#define I40E_ITR_18K 0x001B
40#define I40E_ITR_8K 0x003E
41#define I40E_ITR_4K 0x007A
42#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */
43#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \
44 I40E_ITR_DYNAMIC)
45#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \
46 I40E_ITR_DYNAMIC)
47#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */
48#define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */
49#define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */
50#define I40E_DEFAULT_IRQ_WORK 256 33#define I40E_DEFAULT_IRQ_WORK 256
51#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1) 34
52#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC)) 35/* The datasheet for the X710 and XL710 indicate that the maximum value for
53#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1) 36 * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec
37 * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing
38 * the register value which is divided by 2 lets use the actual values and
39 * avoid an excessive amount of translation.
40 */
41#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */
42#define I40E_ITR_MASK 0x1FFE /* mask for ITR register value */
43#define I40E_MIN_ITR 2 /* reg uses 2 usec resolution */
44#define I40E_ITR_100K 10 /* all values below must be even */
45#define I40E_ITR_50K 20
46#define I40E_ITR_20K 50
47#define I40E_ITR_18K 60
48#define I40E_ITR_8K 122
49#define I40E_MAX_ITR 8160 /* maximum value as per datasheet */
50#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC)
51#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK)
52#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC))
53
54#define I40E_ITR_RX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC)
55#define I40E_ITR_TX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC)
56
54/* 0x40 is the enable bit for interrupt rate limiting, and must be set if 57/* 0x40 is the enable bit for interrupt rate limiting, and must be set if
55 * the value of the rate limit is non-zero 58 * the value of the rate limit is non-zero
56 */ 59 */
57#define INTRL_ENA BIT(6) 60#define INTRL_ENA BIT(6)
61#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */
58#define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2) 62#define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2)
63
59/** 64/**
60 * i40e_intrl_usec_to_reg - convert interrupt rate limit to register 65 * i40e_intrl_usec_to_reg - convert interrupt rate limit to register
61 * @intrl: interrupt rate limit to convert 66 * @intrl: interrupt rate limit to convert
@@ -382,8 +387,7 @@ struct i40e_ring {
382 * these values always store the USER setting, and must be converted 387 * these values always store the USER setting, and must be converted
383 * before programming to a register. 388 * before programming to a register.
384 */ 389 */
385 u16 rx_itr_setting; 390 u16 itr_setting;
386 u16 tx_itr_setting;
387 391
388 u16 count; /* Number of descriptors */ 392 u16 count; /* Number of descriptors */
389 u16 reg_idx; /* HW register index of the ring */ 393 u16 reg_idx; /* HW register index of the ring */
@@ -459,21 +463,21 @@ static inline void set_ring_xdp(struct i40e_ring *ring)
459 ring->flags |= I40E_TXR_FLAGS_XDP; 463 ring->flags |= I40E_TXR_FLAGS_XDP;
460} 464}
461 465
462enum i40e_latency_range { 466#define I40E_ITR_ADAPTIVE_MIN_INC 0x0002
463 I40E_LOWEST_LATENCY = 0, 467#define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002
464 I40E_LOW_LATENCY = 1, 468#define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e
465 I40E_BULK_LATENCY = 2, 469#define I40E_ITR_ADAPTIVE_LATENCY 0x8000
466}; 470#define I40E_ITR_ADAPTIVE_BULK 0x0000
471#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY))
467 472
468struct i40e_ring_container { 473struct i40e_ring_container {
469 /* array of pointers to rings */ 474 struct i40e_ring *ring; /* pointer to linked list of ring(s) */
470 struct i40e_ring *ring; 475 unsigned long next_update; /* jiffies value of next update */
471 unsigned int total_bytes; /* total bytes processed this int */ 476 unsigned int total_bytes; /* total bytes processed this int */
472 unsigned int total_packets; /* total packets processed this int */ 477 unsigned int total_packets; /* total packets processed this int */
473 unsigned long last_itr_update; /* jiffies of last ITR update */
474 u16 count; 478 u16 count;
475 enum i40e_latency_range latency_range; 479 u16 target_itr; /* target ITR setting for ring(s) */
476 u16 itr; 480 u16 current_itr; /* current ITR setting for ring(s) */
477}; 481};
478 482
479/* iterator for handling rings in ring container */ 483/* iterator for handling rings in ring container */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index cd294e6a8587..b0eed8c0b2f2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -39,7 +39,7 @@
39#define I40E_MASK(mask, shift) ((u32)(mask) << (shift)) 39#define I40E_MASK(mask, shift) ((u32)(mask) << (shift))
40 40
41#define I40E_MAX_VSI_QP 16 41#define I40E_MAX_VSI_QP 16
42#define I40E_MAX_VF_VSI 3 42#define I40E_MAX_VF_VSI 4
43#define I40E_MAX_CHAINED_RX_BUFFERS 5 43#define I40E_MAX_CHAINED_RX_BUFFERS 5
44#define I40E_MAX_PF_UDP_OFFLOAD_PORTS 16 44#define I40E_MAX_PF_UDP_OFFLOAD_PORTS 16
45 45
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index e9309fb9084b..5cca083da93c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -258,6 +258,38 @@ static u16 i40e_vc_get_pf_queue_id(struct i40e_vf *vf, u16 vsi_id,
258} 258}
259 259
260/** 260/**
261 * i40e_get_real_pf_qid
262 * @vf: pointer to the VF info
263 * @vsi_id: vsi id
264 * @queue_id: queue number
265 *
266 * wrapper function to get pf_queue_id handling ADq code as well
267 **/
268static u16 i40e_get_real_pf_qid(struct i40e_vf *vf, u16 vsi_id, u16 queue_id)
269{
270 int i;
271
272 if (vf->adq_enabled) {
273 /* Although VF considers all the queues(can be 1 to 16) as its
274 * own but they may actually belong to different VSIs(up to 4).
275 * We need to find which queues belongs to which VSI.
276 */
277 for (i = 0; i < vf->num_tc; i++) {
278 if (queue_id < vf->ch[i].num_qps) {
279 vsi_id = vf->ch[i].vsi_id;
280 break;
281 }
282 /* find right queue id which is relative to a
283 * given VSI.
284 */
285 queue_id -= vf->ch[i].num_qps;
286 }
287 }
288
289 return i40e_vc_get_pf_queue_id(vf, vsi_id, queue_id);
290}
291
292/**
261 * i40e_config_irq_link_list 293 * i40e_config_irq_link_list
262 * @vf: pointer to the VF info 294 * @vf: pointer to the VF info
263 * @vsi_id: id of VSI as given by the FW 295 * @vsi_id: id of VSI as given by the FW
@@ -310,7 +342,7 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
310 342
311 vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES; 343 vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES;
312 qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES; 344 qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES;
313 pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id); 345 pf_queue_id = i40e_get_real_pf_qid(vf, vsi_id, vsi_queue_id);
314 reg = ((qtype << I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT) | pf_queue_id); 346 reg = ((qtype << I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT) | pf_queue_id);
315 347
316 wr32(hw, reg_idx, reg); 348 wr32(hw, reg_idx, reg);
@@ -333,8 +365,9 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
333 if (next_q < size) { 365 if (next_q < size) {
334 vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES; 366 vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES;
335 qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES; 367 qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES;
336 pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, 368 pf_queue_id = i40e_get_real_pf_qid(vf,
337 vsi_queue_id); 369 vsi_id,
370 vsi_queue_id);
338 } else { 371 } else {
339 pf_queue_id = I40E_QUEUE_END_OF_LIST; 372 pf_queue_id = I40E_QUEUE_END_OF_LIST;
340 qtype = 0; 373 qtype = 0;
@@ -669,18 +702,20 @@ error_param:
669/** 702/**
670 * i40e_alloc_vsi_res 703 * i40e_alloc_vsi_res
671 * @vf: pointer to the VF info 704 * @vf: pointer to the VF info
672 * @type: type of VSI to allocate 705 * @idx: VSI index, applies only for ADq mode, zero otherwise
673 * 706 *
674 * alloc VF vsi context & resources 707 * alloc VF vsi context & resources
675 **/ 708 **/
676static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type) 709static int i40e_alloc_vsi_res(struct i40e_vf *vf, u8 idx)
677{ 710{
678 struct i40e_mac_filter *f = NULL; 711 struct i40e_mac_filter *f = NULL;
679 struct i40e_pf *pf = vf->pf; 712 struct i40e_pf *pf = vf->pf;
680 struct i40e_vsi *vsi; 713 struct i40e_vsi *vsi;
714 u64 max_tx_rate = 0;
681 int ret = 0; 715 int ret = 0;
682 716
683 vsi = i40e_vsi_setup(pf, type, pf->vsi[pf->lan_vsi]->seid, vf->vf_id); 717 vsi = i40e_vsi_setup(pf, I40E_VSI_SRIOV, pf->vsi[pf->lan_vsi]->seid,
718 vf->vf_id);
684 719
685 if (!vsi) { 720 if (!vsi) {
686 dev_err(&pf->pdev->dev, 721 dev_err(&pf->pdev->dev,
@@ -689,7 +724,8 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
689 ret = -ENOENT; 724 ret = -ENOENT;
690 goto error_alloc_vsi_res; 725 goto error_alloc_vsi_res;
691 } 726 }
692 if (type == I40E_VSI_SRIOV) { 727
728 if (!idx) {
693 u64 hena = i40e_pf_get_default_rss_hena(pf); 729 u64 hena = i40e_pf_get_default_rss_hena(pf);
694 u8 broadcast[ETH_ALEN]; 730 u8 broadcast[ETH_ALEN];
695 731
@@ -721,17 +757,29 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
721 spin_unlock_bh(&vsi->mac_filter_hash_lock); 757 spin_unlock_bh(&vsi->mac_filter_hash_lock);
722 wr32(&pf->hw, I40E_VFQF_HENA1(0, vf->vf_id), (u32)hena); 758 wr32(&pf->hw, I40E_VFQF_HENA1(0, vf->vf_id), (u32)hena);
723 wr32(&pf->hw, I40E_VFQF_HENA1(1, vf->vf_id), (u32)(hena >> 32)); 759 wr32(&pf->hw, I40E_VFQF_HENA1(1, vf->vf_id), (u32)(hena >> 32));
760 /* program mac filter only for VF VSI */
761 ret = i40e_sync_vsi_filters(vsi);
762 if (ret)
763 dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
724 } 764 }
725 765
726 /* program mac filter */ 766 /* storing VSI index and id for ADq and don't apply the mac filter */
727 ret = i40e_sync_vsi_filters(vsi); 767 if (vf->adq_enabled) {
728 if (ret) 768 vf->ch[idx].vsi_idx = vsi->idx;
729 dev_err(&pf->pdev->dev, "Unable to program ucast filters\n"); 769 vf->ch[idx].vsi_id = vsi->id;
770 }
730 771
731 /* Set VF bandwidth if specified */ 772 /* Set VF bandwidth if specified */
732 if (vf->tx_rate) { 773 if (vf->tx_rate) {
774 max_tx_rate = vf->tx_rate;
775 } else if (vf->ch[idx].max_tx_rate) {
776 max_tx_rate = vf->ch[idx].max_tx_rate;
777 }
778
779 if (max_tx_rate) {
780 max_tx_rate = div_u64(max_tx_rate, I40E_BW_CREDIT_DIVISOR);
733 ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid, 781 ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid,
734 vf->tx_rate / 50, 0, NULL); 782 max_tx_rate, 0, NULL);
735 if (ret) 783 if (ret)
736 dev_err(&pf->pdev->dev, "Unable to set tx rate, VF %d, error code %d.\n", 784 dev_err(&pf->pdev->dev, "Unable to set tx rate, VF %d, error code %d.\n",
737 vf->vf_id, ret); 785 vf->vf_id, ret);
@@ -742,6 +790,92 @@ error_alloc_vsi_res:
742} 790}
743 791
744/** 792/**
793 * i40e_map_pf_queues_to_vsi
794 * @vf: pointer to the VF info
795 *
796 * PF maps LQPs to a VF by programming VSILAN_QTABLE & VPLAN_QTABLE. This
797 * function takes care of first part VSILAN_QTABLE, mapping pf queues to VSI.
798 **/
799static void i40e_map_pf_queues_to_vsi(struct i40e_vf *vf)
800{
801 struct i40e_pf *pf = vf->pf;
802 struct i40e_hw *hw = &pf->hw;
803 u32 reg, num_tc = 1; /* VF has at least one traffic class */
804 u16 vsi_id, qps;
805 int i, j;
806
807 if (vf->adq_enabled)
808 num_tc = vf->num_tc;
809
810 for (i = 0; i < num_tc; i++) {
811 if (vf->adq_enabled) {
812 qps = vf->ch[i].num_qps;
813 vsi_id = vf->ch[i].vsi_id;
814 } else {
815 qps = pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
816 vsi_id = vf->lan_vsi_id;
817 }
818
819 for (j = 0; j < 7; j++) {
820 if (j * 2 >= qps) {
821 /* end of list */
822 reg = 0x07FF07FF;
823 } else {
824 u16 qid = i40e_vc_get_pf_queue_id(vf,
825 vsi_id,
826 j * 2);
827 reg = qid;
828 qid = i40e_vc_get_pf_queue_id(vf, vsi_id,
829 (j * 2) + 1);
830 reg |= qid << 16;
831 }
832 i40e_write_rx_ctl(hw,
833 I40E_VSILAN_QTABLE(j, vsi_id),
834 reg);
835 }
836 }
837}
838
839/**
840 * i40e_map_pf_to_vf_queues
841 * @vf: pointer to the VF info
842 *
843 * PF maps LQPs to a VF by programming VSILAN_QTABLE & VPLAN_QTABLE. This
844 * function takes care of the second part VPLAN_QTABLE & completes VF mappings.
845 **/
846static void i40e_map_pf_to_vf_queues(struct i40e_vf *vf)
847{
848 struct i40e_pf *pf = vf->pf;
849 struct i40e_hw *hw = &pf->hw;
850 u32 reg, total_qps = 0;
851 u32 qps, num_tc = 1; /* VF has at least one traffic class */
852 u16 vsi_id, qid;
853 int i, j;
854
855 if (vf->adq_enabled)
856 num_tc = vf->num_tc;
857
858 for (i = 0; i < num_tc; i++) {
859 if (vf->adq_enabled) {
860 qps = vf->ch[i].num_qps;
861 vsi_id = vf->ch[i].vsi_id;
862 } else {
863 qps = pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
864 vsi_id = vf->lan_vsi_id;
865 }
866
867 for (j = 0; j < qps; j++) {
868 qid = i40e_vc_get_pf_queue_id(vf, vsi_id, j);
869
870 reg = (qid & I40E_VPLAN_QTABLE_QINDEX_MASK);
871 wr32(hw, I40E_VPLAN_QTABLE(total_qps, vf->vf_id),
872 reg);
873 total_qps++;
874 }
875 }
876}
877
878/**
745 * i40e_enable_vf_mappings 879 * i40e_enable_vf_mappings
746 * @vf: pointer to the VF info 880 * @vf: pointer to the VF info
747 * 881 *
@@ -751,8 +885,7 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf)
751{ 885{
752 struct i40e_pf *pf = vf->pf; 886 struct i40e_pf *pf = vf->pf;
753 struct i40e_hw *hw = &pf->hw; 887 struct i40e_hw *hw = &pf->hw;
754 u32 reg, total_queue_pairs = 0; 888 u32 reg;
755 int j;
756 889
757 /* Tell the hardware we're using noncontiguous mapping. HW requires 890 /* Tell the hardware we're using noncontiguous mapping. HW requires
758 * that VF queues be mapped using this method, even when they are 891 * that VF queues be mapped using this method, even when they are
@@ -765,30 +898,8 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf)
765 reg = I40E_VPLAN_MAPENA_TXRX_ENA_MASK; 898 reg = I40E_VPLAN_MAPENA_TXRX_ENA_MASK;
766 wr32(hw, I40E_VPLAN_MAPENA(vf->vf_id), reg); 899 wr32(hw, I40E_VPLAN_MAPENA(vf->vf_id), reg);
767 900
768 /* map PF queues to VF queues */ 901 i40e_map_pf_to_vf_queues(vf);
769 for (j = 0; j < pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; j++) { 902 i40e_map_pf_queues_to_vsi(vf);
770 u16 qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id, j);
771
772 reg = (qid & I40E_VPLAN_QTABLE_QINDEX_MASK);
773 wr32(hw, I40E_VPLAN_QTABLE(total_queue_pairs, vf->vf_id), reg);
774 total_queue_pairs++;
775 }
776
777 /* map PF queues to VSI */
778 for (j = 0; j < 7; j++) {
779 if (j * 2 >= pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs) {
780 reg = 0x07FF07FF; /* unused */
781 } else {
782 u16 qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id,
783 j * 2);
784 reg = qid;
785 qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id,
786 (j * 2) + 1);
787 reg |= qid << 16;
788 }
789 i40e_write_rx_ctl(hw, I40E_VSILAN_QTABLE(j, vf->lan_vsi_id),
790 reg);
791 }
792 903
793 i40e_flush(hw); 904 i40e_flush(hw);
794} 905}
@@ -824,7 +935,7 @@ static void i40e_free_vf_res(struct i40e_vf *vf)
824 struct i40e_pf *pf = vf->pf; 935 struct i40e_pf *pf = vf->pf;
825 struct i40e_hw *hw = &pf->hw; 936 struct i40e_hw *hw = &pf->hw;
826 u32 reg_idx, reg; 937 u32 reg_idx, reg;
827 int i, msix_vf; 938 int i, j, msix_vf;
828 939
829 /* Start by disabling VF's configuration API to prevent the OS from 940 /* Start by disabling VF's configuration API to prevent the OS from
830 * accessing the VF's VSI after it's freed / invalidated. 941 * accessing the VF's VSI after it's freed / invalidated.
@@ -846,6 +957,20 @@ static void i40e_free_vf_res(struct i40e_vf *vf)
846 vf->lan_vsi_id = 0; 957 vf->lan_vsi_id = 0;
847 vf->num_mac = 0; 958 vf->num_mac = 0;
848 } 959 }
960
961 /* do the accounting and remove additional ADq VSI's */
962 if (vf->adq_enabled && vf->ch[0].vsi_idx) {
963 for (j = 0; j < vf->num_tc; j++) {
964 /* At this point VSI0 is already released so don't
965 * release it again and only clear their values in
966 * structure variables
967 */
968 if (j)
969 i40e_vsi_release(pf->vsi[vf->ch[j].vsi_idx]);
970 vf->ch[j].vsi_idx = 0;
971 vf->ch[j].vsi_id = 0;
972 }
973 }
849 msix_vf = pf->hw.func_caps.num_msix_vectors_vf; 974 msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
850 975
851 /* disable interrupts so the VF starts in a known state */ 976 /* disable interrupts so the VF starts in a known state */
@@ -891,7 +1016,7 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf)
891{ 1016{
892 struct i40e_pf *pf = vf->pf; 1017 struct i40e_pf *pf = vf->pf;
893 int total_queue_pairs = 0; 1018 int total_queue_pairs = 0;
894 int ret; 1019 int ret, idx;
895 1020
896 if (vf->num_req_queues && 1021 if (vf->num_req_queues &&
897 vf->num_req_queues <= pf->queues_left + I40E_DEFAULT_QUEUES_PER_VF) 1022 vf->num_req_queues <= pf->queues_left + I40E_DEFAULT_QUEUES_PER_VF)
@@ -900,11 +1025,30 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf)
900 pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF; 1025 pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF;
901 1026
902 /* allocate hw vsi context & associated resources */ 1027 /* allocate hw vsi context & associated resources */
903 ret = i40e_alloc_vsi_res(vf, I40E_VSI_SRIOV); 1028 ret = i40e_alloc_vsi_res(vf, 0);
904 if (ret) 1029 if (ret)
905 goto error_alloc; 1030 goto error_alloc;
906 total_queue_pairs += pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; 1031 total_queue_pairs += pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
907 1032
1033 /* allocate additional VSIs based on tc information for ADq */
1034 if (vf->adq_enabled) {
1035 if (pf->queues_left >=
1036 (I40E_MAX_VF_QUEUES - I40E_DEFAULT_QUEUES_PER_VF)) {
1037 /* TC 0 always belongs to VF VSI */
1038 for (idx = 1; idx < vf->num_tc; idx++) {
1039 ret = i40e_alloc_vsi_res(vf, idx);
1040 if (ret)
1041 goto error_alloc;
1042 }
1043 /* send correct number of queues */
1044 total_queue_pairs = I40E_MAX_VF_QUEUES;
1045 } else {
1046 dev_info(&pf->pdev->dev, "VF %d: Not enough queues to allocate, disabling ADq\n",
1047 vf->vf_id);
1048 vf->adq_enabled = false;
1049 }
1050 }
1051
908 /* We account for each VF to get a default number of queue pairs. If 1052 /* We account for each VF to get a default number of queue pairs. If
909 * the VF has now requested more, we need to account for that to make 1053 * the VF has now requested more, we need to account for that to make
910 * certain we never request more queues than we actually have left in 1054 * certain we never request more queues than we actually have left in
@@ -1537,6 +1681,27 @@ static int i40e_vc_get_version_msg(struct i40e_vf *vf, u8 *msg)
1537} 1681}
1538 1682
1539/** 1683/**
1684 * i40e_del_qch - delete all the additional VSIs created as a part of ADq
1685 * @vf: pointer to VF structure
1686 **/
1687static void i40e_del_qch(struct i40e_vf *vf)
1688{
1689 struct i40e_pf *pf = vf->pf;
1690 int i;
1691
1692 /* first element in the array belongs to primary VF VSI and we shouldn't
1693 * delete it. We should however delete the rest of the VSIs created
1694 */
1695 for (i = 1; i < vf->num_tc; i++) {
1696 if (vf->ch[i].vsi_idx) {
1697 i40e_vsi_release(pf->vsi[vf->ch[i].vsi_idx]);
1698 vf->ch[i].vsi_idx = 0;
1699 vf->ch[i].vsi_id = 0;
1700 }
1701 }
1702}
1703
1704/**
1540 * i40e_vc_get_vf_resources_msg 1705 * i40e_vc_get_vf_resources_msg
1541 * @vf: pointer to the VF info 1706 * @vf: pointer to the VF info
1542 * @msg: pointer to the msg buffer 1707 * @msg: pointer to the msg buffer
@@ -1631,6 +1796,9 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
1631 if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES) 1796 if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)
1632 vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES; 1797 vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
1633 1798
1799 if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADQ)
1800 vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ADQ;
1801
1634 vfres->num_vsis = num_vsis; 1802 vfres->num_vsis = num_vsis;
1635 vfres->num_queue_pairs = vf->num_queue_pairs; 1803 vfres->num_queue_pairs = vf->num_queue_pairs;
1636 vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf; 1804 vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf;
@@ -1855,27 +2023,37 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
1855 (struct virtchnl_vsi_queue_config_info *)msg; 2023 (struct virtchnl_vsi_queue_config_info *)msg;
1856 struct virtchnl_queue_pair_info *qpi; 2024 struct virtchnl_queue_pair_info *qpi;
1857 struct i40e_pf *pf = vf->pf; 2025 struct i40e_pf *pf = vf->pf;
1858 u16 vsi_id, vsi_queue_id; 2026 u16 vsi_id, vsi_queue_id = 0;
1859 i40e_status aq_ret = 0; 2027 i40e_status aq_ret = 0;
1860 int i; 2028 int i, j = 0, idx = 0;
2029
2030 vsi_id = qci->vsi_id;
1861 2031
1862 if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { 2032 if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
1863 aq_ret = I40E_ERR_PARAM; 2033 aq_ret = I40E_ERR_PARAM;
1864 goto error_param; 2034 goto error_param;
1865 } 2035 }
1866 2036
1867 vsi_id = qci->vsi_id;
1868 if (!i40e_vc_isvalid_vsi_id(vf, vsi_id)) { 2037 if (!i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
1869 aq_ret = I40E_ERR_PARAM; 2038 aq_ret = I40E_ERR_PARAM;
1870 goto error_param; 2039 goto error_param;
1871 } 2040 }
2041
1872 for (i = 0; i < qci->num_queue_pairs; i++) { 2042 for (i = 0; i < qci->num_queue_pairs; i++) {
1873 qpi = &qci->qpair[i]; 2043 qpi = &qci->qpair[i];
1874 vsi_queue_id = qpi->txq.queue_id; 2044
1875 if ((qpi->txq.vsi_id != vsi_id) || 2045 if (!vf->adq_enabled) {
1876 (qpi->rxq.vsi_id != vsi_id) || 2046 vsi_queue_id = qpi->txq.queue_id;
1877 (qpi->rxq.queue_id != vsi_queue_id) || 2047
1878 !i40e_vc_isvalid_queue_id(vf, vsi_id, vsi_queue_id)) { 2048 if (qpi->txq.vsi_id != qci->vsi_id ||
2049 qpi->rxq.vsi_id != qci->vsi_id ||
2050 qpi->rxq.queue_id != vsi_queue_id) {
2051 aq_ret = I40E_ERR_PARAM;
2052 goto error_param;
2053 }
2054 }
2055
2056 if (!i40e_vc_isvalid_queue_id(vf, vsi_id, vsi_queue_id)) {
1879 aq_ret = I40E_ERR_PARAM; 2057 aq_ret = I40E_ERR_PARAM;
1880 goto error_param; 2058 goto error_param;
1881 } 2059 }
@@ -1887,9 +2065,33 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
1887 aq_ret = I40E_ERR_PARAM; 2065 aq_ret = I40E_ERR_PARAM;
1888 goto error_param; 2066 goto error_param;
1889 } 2067 }
2068
2069 /* For ADq there can be up to 4 VSIs with max 4 queues each.
2070 * VF does not know about these additional VSIs and all
2071 * it cares is about its own queues. PF configures these queues
2072 * to its appropriate VSIs based on TC mapping
2073 **/
2074 if (vf->adq_enabled) {
2075 if (j == (vf->ch[idx].num_qps - 1)) {
2076 idx++;
2077 j = 0; /* resetting the queue count */
2078 vsi_queue_id = 0;
2079 } else {
2080 j++;
2081 vsi_queue_id++;
2082 }
2083 vsi_id = vf->ch[idx].vsi_id;
2084 }
1890 } 2085 }
1891 /* set vsi num_queue_pairs in use to num configured by VF */ 2086 /* set vsi num_queue_pairs in use to num configured by VF */
1892 pf->vsi[vf->lan_vsi_idx]->num_queue_pairs = qci->num_queue_pairs; 2087 if (!vf->adq_enabled) {
2088 pf->vsi[vf->lan_vsi_idx]->num_queue_pairs =
2089 qci->num_queue_pairs;
2090 } else {
2091 for (i = 0; i < vf->num_tc; i++)
2092 pf->vsi[vf->ch[i].vsi_idx]->num_queue_pairs =
2093 vf->ch[i].num_qps;
2094 }
1893 2095
1894error_param: 2096error_param:
1895 /* send the response to the VF */ 2097 /* send the response to the VF */
@@ -1898,6 +2100,33 @@ error_param:
1898} 2100}
1899 2101
1900/** 2102/**
2103 * i40e_validate_queue_map
2104 * @vsi_id: vsi id
2105 * @queuemap: Tx or Rx queue map
2106 *
2107 * check if Tx or Rx queue map is valid
2108 **/
2109static int i40e_validate_queue_map(struct i40e_vf *vf, u16 vsi_id,
2110 unsigned long queuemap)
2111{
2112 u16 vsi_queue_id, queue_id;
2113
2114 for_each_set_bit(vsi_queue_id, &queuemap, I40E_MAX_VSI_QP) {
2115 if (vf->adq_enabled) {
2116 vsi_id = vf->ch[vsi_queue_id / I40E_MAX_VF_VSI].vsi_id;
2117 queue_id = (vsi_queue_id % I40E_DEFAULT_QUEUES_PER_VF);
2118 } else {
2119 queue_id = vsi_queue_id;
2120 }
2121
2122 if (!i40e_vc_isvalid_queue_id(vf, vsi_id, queue_id))
2123 return -EINVAL;
2124 }
2125
2126 return 0;
2127}
2128
2129/**
1901 * i40e_vc_config_irq_map_msg 2130 * i40e_vc_config_irq_map_msg
1902 * @vf: pointer to the VF info 2131 * @vf: pointer to the VF info
1903 * @msg: pointer to the msg buffer 2132 * @msg: pointer to the msg buffer
@@ -1911,9 +2140,8 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
1911 struct virtchnl_irq_map_info *irqmap_info = 2140 struct virtchnl_irq_map_info *irqmap_info =
1912 (struct virtchnl_irq_map_info *)msg; 2141 (struct virtchnl_irq_map_info *)msg;
1913 struct virtchnl_vector_map *map; 2142 struct virtchnl_vector_map *map;
1914 u16 vsi_id, vsi_queue_id, vector_id; 2143 u16 vsi_id, vector_id;
1915 i40e_status aq_ret = 0; 2144 i40e_status aq_ret = 0;
1916 unsigned long tempmap;
1917 int i; 2145 int i;
1918 2146
1919 if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { 2147 if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
@@ -1923,7 +2151,6 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
1923 2151
1924 for (i = 0; i < irqmap_info->num_vectors; i++) { 2152 for (i = 0; i < irqmap_info->num_vectors; i++) {
1925 map = &irqmap_info->vecmap[i]; 2153 map = &irqmap_info->vecmap[i];
1926
1927 vector_id = map->vector_id; 2154 vector_id = map->vector_id;
1928 vsi_id = map->vsi_id; 2155 vsi_id = map->vsi_id;
1929 /* validate msg params */ 2156 /* validate msg params */
@@ -1933,23 +2160,14 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
1933 goto error_param; 2160 goto error_param;
1934 } 2161 }
1935 2162
1936 /* lookout for the invalid queue index */ 2163 if (i40e_validate_queue_map(vf, vsi_id, map->rxq_map)) {
1937 tempmap = map->rxq_map; 2164 aq_ret = I40E_ERR_PARAM;
1938 for_each_set_bit(vsi_queue_id, &tempmap, I40E_MAX_VSI_QP) { 2165 goto error_param;
1939 if (!i40e_vc_isvalid_queue_id(vf, vsi_id,
1940 vsi_queue_id)) {
1941 aq_ret = I40E_ERR_PARAM;
1942 goto error_param;
1943 }
1944 } 2166 }
1945 2167
1946 tempmap = map->txq_map; 2168 if (i40e_validate_queue_map(vf, vsi_id, map->txq_map)) {
1947 for_each_set_bit(vsi_queue_id, &tempmap, I40E_MAX_VSI_QP) { 2169 aq_ret = I40E_ERR_PARAM;
1948 if (!i40e_vc_isvalid_queue_id(vf, vsi_id, 2170 goto error_param;
1949 vsi_queue_id)) {
1950 aq_ret = I40E_ERR_PARAM;
1951 goto error_param;
1952 }
1953 } 2171 }
1954 2172
1955 i40e_config_irq_link_list(vf, vsi_id, map); 2173 i40e_config_irq_link_list(vf, vsi_id, map);
@@ -1975,6 +2193,7 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
1975 struct i40e_pf *pf = vf->pf; 2193 struct i40e_pf *pf = vf->pf;
1976 u16 vsi_id = vqs->vsi_id; 2194 u16 vsi_id = vqs->vsi_id;
1977 i40e_status aq_ret = 0; 2195 i40e_status aq_ret = 0;
2196 int i;
1978 2197
1979 if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { 2198 if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
1980 aq_ret = I40E_ERR_PARAM; 2199 aq_ret = I40E_ERR_PARAM;
@@ -1993,6 +2212,16 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
1993 2212
1994 if (i40e_vsi_start_rings(pf->vsi[vf->lan_vsi_idx])) 2213 if (i40e_vsi_start_rings(pf->vsi[vf->lan_vsi_idx]))
1995 aq_ret = I40E_ERR_TIMEOUT; 2214 aq_ret = I40E_ERR_TIMEOUT;
2215
2216 /* need to start the rings for additional ADq VSI's as well */
2217 if (vf->adq_enabled) {
2218 /* zero belongs to LAN VSI */
2219 for (i = 1; i < vf->num_tc; i++) {
2220 if (i40e_vsi_start_rings(pf->vsi[vf->ch[i].vsi_idx]))
2221 aq_ret = I40E_ERR_TIMEOUT;
2222 }
2223 }
2224
1996error_param: 2225error_param:
1997 /* send the response to the VF */ 2226 /* send the response to the VF */
1998 return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES, 2227 return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES,
@@ -2688,6 +2917,618 @@ err:
2688} 2917}
2689 2918
2690/** 2919/**
2920 * i40e_validate_cloud_filter
2921 * @mask: mask for TC filter
2922 * @data: data for TC filter
2923 *
2924 * This function validates cloud filter programmed as TC filter for ADq
2925 **/
2926static int i40e_validate_cloud_filter(struct i40e_vf *vf,
2927 struct virtchnl_filter *tc_filter)
2928{
2929 struct virtchnl_l4_spec mask = tc_filter->mask.tcp_spec;
2930 struct virtchnl_l4_spec data = tc_filter->data.tcp_spec;
2931 struct i40e_pf *pf = vf->pf;
2932 struct i40e_vsi *vsi = NULL;
2933 struct i40e_mac_filter *f;
2934 struct hlist_node *h;
2935 bool found = false;
2936 int bkt;
2937
2938 if (!tc_filter->action) {
2939 dev_info(&pf->pdev->dev,
2940 "VF %d: Currently ADq doesn't support Drop Action\n",
2941 vf->vf_id);
2942 goto err;
2943 }
2944
2945 /* action_meta is TC number here to which the filter is applied */
2946 if (!tc_filter->action_meta ||
2947 tc_filter->action_meta > I40E_MAX_VF_VSI) {
2948 dev_info(&pf->pdev->dev, "VF %d: Invalid TC number %u\n",
2949 vf->vf_id, tc_filter->action_meta);
2950 goto err;
2951 }
2952
2953 /* Check filter if it's programmed for advanced mode or basic mode.
2954 * There are two ADq modes (for VF only),
2955 * 1. Basic mode: intended to allow as many filter options as possible
2956 * to be added to a VF in Non-trusted mode. Main goal is
2957 * to add filters to its own MAC and VLAN id.
2958 * 2. Advanced mode: is for allowing filters to be applied other than
2959 * its own MAC or VLAN. This mode requires the VF to be
2960 * Trusted.
2961 */
2962 if (mask.dst_mac[0] && !mask.dst_ip[0]) {
2963 vsi = pf->vsi[vf->lan_vsi_idx];
2964 f = i40e_find_mac(vsi, data.dst_mac);
2965
2966 if (!f) {
2967 dev_info(&pf->pdev->dev,
2968 "Destination MAC %pM doesn't belong to VF %d\n",
2969 data.dst_mac, vf->vf_id);
2970 goto err;
2971 }
2972
2973 if (mask.vlan_id) {
2974 hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f,
2975 hlist) {
2976 if (f->vlan == ntohs(data.vlan_id)) {
2977 found = true;
2978 break;
2979 }
2980 }
2981 if (!found) {
2982 dev_info(&pf->pdev->dev,
2983 "VF %d doesn't have any VLAN id %u\n",
2984 vf->vf_id, ntohs(data.vlan_id));
2985 goto err;
2986 }
2987 }
2988 } else {
2989 /* Check if VF is trusted */
2990 if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
2991 dev_err(&pf->pdev->dev,
2992 "VF %d not trusted, make VF trusted to add advanced mode ADq cloud filters\n",
2993 vf->vf_id);
2994 return I40E_ERR_CONFIG;
2995 }
2996 }
2997
2998 if (mask.dst_mac[0] & data.dst_mac[0]) {
2999 if (is_broadcast_ether_addr(data.dst_mac) ||
3000 is_zero_ether_addr(data.dst_mac)) {
3001 dev_info(&pf->pdev->dev, "VF %d: Invalid Dest MAC addr %pM\n",
3002 vf->vf_id, data.dst_mac);
3003 goto err;
3004 }
3005 }
3006
3007 if (mask.src_mac[0] & data.src_mac[0]) {
3008 if (is_broadcast_ether_addr(data.src_mac) ||
3009 is_zero_ether_addr(data.src_mac)) {
3010 dev_info(&pf->pdev->dev, "VF %d: Invalid Source MAC addr %pM\n",
3011 vf->vf_id, data.src_mac);
3012 goto err;
3013 }
3014 }
3015
3016 if (mask.dst_port & data.dst_port) {
3017 if (!data.dst_port || be16_to_cpu(data.dst_port) > 0xFFFF) {
3018 dev_info(&pf->pdev->dev, "VF %d: Invalid Dest port\n",
3019 vf->vf_id);
3020 goto err;
3021 }
3022 }
3023
3024 if (mask.src_port & data.src_port) {
3025 if (!data.src_port || be16_to_cpu(data.src_port) > 0xFFFF) {
3026 dev_info(&pf->pdev->dev, "VF %d: Invalid Source port\n",
3027 vf->vf_id);
3028 goto err;
3029 }
3030 }
3031
3032 if (tc_filter->flow_type != VIRTCHNL_TCP_V6_FLOW &&
3033 tc_filter->flow_type != VIRTCHNL_TCP_V4_FLOW) {
3034 dev_info(&pf->pdev->dev, "VF %d: Invalid Flow type\n",
3035 vf->vf_id);
3036 goto err;
3037 }
3038
3039 if (mask.vlan_id & data.vlan_id) {
3040 if (ntohs(data.vlan_id) > I40E_MAX_VLANID) {
3041 dev_info(&pf->pdev->dev, "VF %d: invalid VLAN ID\n",
3042 vf->vf_id);
3043 goto err;
3044 }
3045 }
3046
3047 return I40E_SUCCESS;
3048err:
3049 return I40E_ERR_CONFIG;
3050}
3051
3052/**
3053 * i40e_find_vsi_from_seid - searches for the vsi with the given seid
3054 * @vf: pointer to the VF info
3055 * @seid - seid of the vsi it is searching for
3056 **/
3057static struct i40e_vsi *i40e_find_vsi_from_seid(struct i40e_vf *vf, u16 seid)
3058{
3059 struct i40e_pf *pf = vf->pf;
3060 struct i40e_vsi *vsi = NULL;
3061 int i;
3062
3063 for (i = 0; i < vf->num_tc ; i++) {
3064 vsi = i40e_find_vsi_from_id(pf, vf->ch[i].vsi_id);
3065 if (vsi->seid == seid)
3066 return vsi;
3067 }
3068 return NULL;
3069}
3070
3071/**
3072 * i40e_del_all_cloud_filters
3073 * @vf: pointer to the VF info
3074 *
3075 * This function deletes all cloud filters
3076 **/
3077static void i40e_del_all_cloud_filters(struct i40e_vf *vf)
3078{
3079 struct i40e_cloud_filter *cfilter = NULL;
3080 struct i40e_pf *pf = vf->pf;
3081 struct i40e_vsi *vsi = NULL;
3082 struct hlist_node *node;
3083 int ret;
3084
3085 hlist_for_each_entry_safe(cfilter, node,
3086 &vf->cloud_filter_list, cloud_node) {
3087 vsi = i40e_find_vsi_from_seid(vf, cfilter->seid);
3088
3089 if (!vsi) {
3090 dev_err(&pf->pdev->dev, "VF %d: no VSI found for matching %u seid, can't delete cloud filter\n",
3091 vf->vf_id, cfilter->seid);
3092 continue;
3093 }
3094
3095 if (cfilter->dst_port)
3096 ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter,
3097 false);
3098 else
3099 ret = i40e_add_del_cloud_filter(vsi, cfilter, false);
3100 if (ret)
3101 dev_err(&pf->pdev->dev,
3102 "VF %d: Failed to delete cloud filter, err %s aq_err %s\n",
3103 vf->vf_id, i40e_stat_str(&pf->hw, ret),
3104 i40e_aq_str(&pf->hw,
3105 pf->hw.aq.asq_last_status));
3106
3107 hlist_del(&cfilter->cloud_node);
3108 kfree(cfilter);
3109 vf->num_cloud_filters--;
3110 }
3111}
3112
3113/**
3114 * i40e_vc_del_cloud_filter
3115 * @vf: pointer to the VF info
3116 * @msg: pointer to the msg buffer
3117 *
3118 * This function deletes a cloud filter programmed as TC filter for ADq
3119 **/
3120static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg)
3121{
3122 struct virtchnl_filter *vcf = (struct virtchnl_filter *)msg;
3123 struct virtchnl_l4_spec mask = vcf->mask.tcp_spec;
3124 struct virtchnl_l4_spec tcf = vcf->data.tcp_spec;
3125 struct i40e_cloud_filter cfilter, *cf = NULL;
3126 struct i40e_pf *pf = vf->pf;
3127 struct i40e_vsi *vsi = NULL;
3128 struct hlist_node *node;
3129 i40e_status aq_ret = 0;
3130 int i, ret;
3131
3132 if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
3133 aq_ret = I40E_ERR_PARAM;
3134 goto err;
3135 }
3136
3137 if (!vf->adq_enabled) {
3138 dev_info(&pf->pdev->dev,
3139 "VF %d: ADq not enabled, can't apply cloud filter\n",
3140 vf->vf_id);
3141 aq_ret = I40E_ERR_PARAM;
3142 goto err;
3143 }
3144
3145 if (i40e_validate_cloud_filter(vf, vcf)) {
3146 dev_info(&pf->pdev->dev,
3147 "VF %d: Invalid input, can't apply cloud filter\n",
3148 vf->vf_id);
3149 aq_ret = I40E_ERR_PARAM;
3150 goto err;
3151 }
3152
3153 memset(&cfilter, 0, sizeof(cfilter));
3154 /* parse destination mac address */
3155 for (i = 0; i < ETH_ALEN; i++)
3156 cfilter.dst_mac[i] = mask.dst_mac[i] & tcf.dst_mac[i];
3157
3158 /* parse source mac address */
3159 for (i = 0; i < ETH_ALEN; i++)
3160 cfilter.src_mac[i] = mask.src_mac[i] & tcf.src_mac[i];
3161
3162 cfilter.vlan_id = mask.vlan_id & tcf.vlan_id;
3163 cfilter.dst_port = mask.dst_port & tcf.dst_port;
3164 cfilter.src_port = mask.src_port & tcf.src_port;
3165
3166 switch (vcf->flow_type) {
3167 case VIRTCHNL_TCP_V4_FLOW:
3168 cfilter.n_proto = ETH_P_IP;
3169 if (mask.dst_ip[0] & tcf.dst_ip[0])
3170 memcpy(&cfilter.ip.v4.dst_ip, tcf.dst_ip,
3171 ARRAY_SIZE(tcf.dst_ip));
3172 else if (mask.src_ip[0] & tcf.dst_ip[0])
3173 memcpy(&cfilter.ip.v4.src_ip, tcf.src_ip,
3174 ARRAY_SIZE(tcf.dst_ip));
3175 break;
3176 case VIRTCHNL_TCP_V6_FLOW:
3177 cfilter.n_proto = ETH_P_IPV6;
3178 if (mask.dst_ip[3] & tcf.dst_ip[3])
3179 memcpy(&cfilter.ip.v6.dst_ip6, tcf.dst_ip,
3180 sizeof(cfilter.ip.v6.dst_ip6));
3181 if (mask.src_ip[3] & tcf.src_ip[3])
3182 memcpy(&cfilter.ip.v6.src_ip6, tcf.src_ip,
3183 sizeof(cfilter.ip.v6.src_ip6));
3184 break;
3185 default:
3186 /* TC filter can be configured based on different combinations
3187 * and in this case IP is not a part of filter config
3188 */
3189 dev_info(&pf->pdev->dev, "VF %d: Flow type not configured\n",
3190 vf->vf_id);
3191 }
3192
3193 /* get the vsi to which the tc belongs to */
3194 vsi = pf->vsi[vf->ch[vcf->action_meta].vsi_idx];
3195 cfilter.seid = vsi->seid;
3196 cfilter.flags = vcf->field_flags;
3197
3198 /* Deleting TC filter */
3199 if (tcf.dst_port)
3200 ret = i40e_add_del_cloud_filter_big_buf(vsi, &cfilter, false);
3201 else
3202 ret = i40e_add_del_cloud_filter(vsi, &cfilter, false);
3203 if (ret) {
3204 dev_err(&pf->pdev->dev,
3205 "VF %d: Failed to delete cloud filter, err %s aq_err %s\n",
3206 vf->vf_id, i40e_stat_str(&pf->hw, ret),
3207 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
3208 goto err;
3209 }
3210
3211 hlist_for_each_entry_safe(cf, node,
3212 &vf->cloud_filter_list, cloud_node) {
3213 if (cf->seid != cfilter.seid)
3214 continue;
3215 if (mask.dst_port)
3216 if (cfilter.dst_port != cf->dst_port)
3217 continue;
3218 if (mask.dst_mac[0])
3219 if (!ether_addr_equal(cf->src_mac, cfilter.src_mac))
3220 continue;
3221 /* for ipv4 data to be valid, only first byte of mask is set */
3222 if (cfilter.n_proto == ETH_P_IP && mask.dst_ip[0])
3223 if (memcmp(&cfilter.ip.v4.dst_ip, &cf->ip.v4.dst_ip,
3224 ARRAY_SIZE(tcf.dst_ip)))
3225 continue;
3226 /* for ipv6, mask is set for all sixteen bytes (4 words) */
3227 if (cfilter.n_proto == ETH_P_IPV6 && mask.dst_ip[3])
3228 if (memcmp(&cfilter.ip.v6.dst_ip6, &cf->ip.v6.dst_ip6,
3229 sizeof(cfilter.ip.v6.src_ip6)))
3230 continue;
3231 if (mask.vlan_id)
3232 if (cfilter.vlan_id != cf->vlan_id)
3233 continue;
3234
3235 hlist_del(&cf->cloud_node);
3236 kfree(cf);
3237 vf->num_cloud_filters--;
3238 }
3239
3240err:
3241 return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DEL_CLOUD_FILTER,
3242 aq_ret);
3243}
3244
3245/**
3246 * i40e_vc_add_cloud_filter
3247 * @vf: pointer to the VF info
3248 * @msg: pointer to the msg buffer
3249 *
3250 * This function adds a cloud filter programmed as TC filter for ADq
3251 **/
3252static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg)
3253{
3254 struct virtchnl_filter *vcf = (struct virtchnl_filter *)msg;
3255 struct virtchnl_l4_spec mask = vcf->mask.tcp_spec;
3256 struct virtchnl_l4_spec tcf = vcf->data.tcp_spec;
3257 struct i40e_cloud_filter *cfilter = NULL;
3258 struct i40e_pf *pf = vf->pf;
3259 struct i40e_vsi *vsi = NULL;
3260 i40e_status aq_ret = 0;
3261 int i, ret;
3262
3263 if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
3264 aq_ret = I40E_ERR_PARAM;
3265 goto err;
3266 }
3267
3268 if (!vf->adq_enabled) {
3269 dev_info(&pf->pdev->dev,
3270 "VF %d: ADq is not enabled, can't apply cloud filter\n",
3271 vf->vf_id);
3272 aq_ret = I40E_ERR_PARAM;
3273 goto err;
3274 }
3275
3276 if (i40e_validate_cloud_filter(vf, vcf)) {
3277 dev_info(&pf->pdev->dev,
3278 "VF %d: Invalid input/s, can't apply cloud filter\n",
3279 vf->vf_id);
3280 aq_ret = I40E_ERR_PARAM;
3281 goto err;
3282 }
3283
3284 cfilter = kzalloc(sizeof(*cfilter), GFP_KERNEL);
3285 if (!cfilter)
3286 return -ENOMEM;
3287
3288 /* parse destination mac address */
3289 for (i = 0; i < ETH_ALEN; i++)
3290 cfilter->dst_mac[i] = mask.dst_mac[i] & tcf.dst_mac[i];
3291
3292 /* parse source mac address */
3293 for (i = 0; i < ETH_ALEN; i++)
3294 cfilter->src_mac[i] = mask.src_mac[i] & tcf.src_mac[i];
3295
3296 cfilter->vlan_id = mask.vlan_id & tcf.vlan_id;
3297 cfilter->dst_port = mask.dst_port & tcf.dst_port;
3298 cfilter->src_port = mask.src_port & tcf.src_port;
3299
3300 switch (vcf->flow_type) {
3301 case VIRTCHNL_TCP_V4_FLOW:
3302 cfilter->n_proto = ETH_P_IP;
3303 if (mask.dst_ip[0] & tcf.dst_ip[0])
3304 memcpy(&cfilter->ip.v4.dst_ip, tcf.dst_ip,
3305 ARRAY_SIZE(tcf.dst_ip));
3306 else if (mask.src_ip[0] & tcf.dst_ip[0])
3307 memcpy(&cfilter->ip.v4.src_ip, tcf.src_ip,
3308 ARRAY_SIZE(tcf.dst_ip));
3309 break;
3310 case VIRTCHNL_TCP_V6_FLOW:
3311 cfilter->n_proto = ETH_P_IPV6;
3312 if (mask.dst_ip[3] & tcf.dst_ip[3])
3313 memcpy(&cfilter->ip.v6.dst_ip6, tcf.dst_ip,
3314 sizeof(cfilter->ip.v6.dst_ip6));
3315 if (mask.src_ip[3] & tcf.src_ip[3])
3316 memcpy(&cfilter->ip.v6.src_ip6, tcf.src_ip,
3317 sizeof(cfilter->ip.v6.src_ip6));
3318 break;
3319 default:
3320 /* TC filter can be configured based on different combinations
3321 * and in this case IP is not a part of filter config
3322 */
3323 dev_info(&pf->pdev->dev, "VF %d: Flow type not configured\n",
3324 vf->vf_id);
3325 }
3326
3327 /* get the VSI to which the TC belongs to */
3328 vsi = pf->vsi[vf->ch[vcf->action_meta].vsi_idx];
3329 cfilter->seid = vsi->seid;
3330 cfilter->flags = vcf->field_flags;
3331
3332 /* Adding cloud filter programmed as TC filter */
3333 if (tcf.dst_port)
3334 ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true);
3335 else
3336 ret = i40e_add_del_cloud_filter(vsi, cfilter, true);
3337 if (ret) {
3338 dev_err(&pf->pdev->dev,
3339 "VF %d: Failed to add cloud filter, err %s aq_err %s\n",
3340 vf->vf_id, i40e_stat_str(&pf->hw, ret),
3341 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
3342 goto err;
3343 }
3344
3345 INIT_HLIST_NODE(&cfilter->cloud_node);
3346 hlist_add_head(&cfilter->cloud_node, &vf->cloud_filter_list);
3347 vf->num_cloud_filters++;
3348err:
3349 return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ADD_CLOUD_FILTER,
3350 aq_ret);
3351}
3352
3353/**
3354 * i40e_vc_add_qch_msg: Add queue channel and enable ADq
3355 * @vf: pointer to the VF info
3356 * @msg: pointer to the msg buffer
3357 **/
3358static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg)
3359{
3360 struct virtchnl_tc_info *tci =
3361 (struct virtchnl_tc_info *)msg;
3362 struct i40e_pf *pf = vf->pf;
3363 struct i40e_link_status *ls = &pf->hw.phy.link_info;
3364 int i, adq_request_qps = 0, speed = 0;
3365 i40e_status aq_ret = 0;
3366
3367 if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
3368 aq_ret = I40E_ERR_PARAM;
3369 goto err;
3370 }
3371
3372 /* ADq cannot be applied if spoof check is ON */
3373 if (vf->spoofchk) {
3374 dev_err(&pf->pdev->dev,
3375 "Spoof check is ON, turn it OFF to enable ADq\n");
3376 aq_ret = I40E_ERR_PARAM;
3377 goto err;
3378 }
3379
3380 if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADQ)) {
3381 dev_err(&pf->pdev->dev,
3382 "VF %d attempting to enable ADq, but hasn't properly negotiated that capability\n",
3383 vf->vf_id);
3384 aq_ret = I40E_ERR_PARAM;
3385 goto err;
3386 }
3387
3388 /* max number of traffic classes for VF currently capped at 4 */
3389 if (!tci->num_tc || tci->num_tc > I40E_MAX_VF_VSI) {
3390 dev_err(&pf->pdev->dev,
3391 "VF %d trying to set %u TCs, valid range 1-4 TCs per VF\n",
3392 vf->vf_id, tci->num_tc);
3393 aq_ret = I40E_ERR_PARAM;
3394 goto err;
3395 }
3396
3397 /* validate queues for each TC */
3398 for (i = 0; i < tci->num_tc; i++)
3399 if (!tci->list[i].count ||
3400 tci->list[i].count > I40E_DEFAULT_QUEUES_PER_VF) {
3401 dev_err(&pf->pdev->dev,
3402 "VF %d: TC %d trying to set %u queues, valid range 1-4 queues per TC\n",
3403 vf->vf_id, i, tci->list[i].count);
3404 aq_ret = I40E_ERR_PARAM;
3405 goto err;
3406 }
3407
3408 /* need Max VF queues but already have default number of queues */
3409 adq_request_qps = I40E_MAX_VF_QUEUES - I40E_DEFAULT_QUEUES_PER_VF;
3410
3411 if (pf->queues_left < adq_request_qps) {
3412 dev_err(&pf->pdev->dev,
3413 "No queues left to allocate to VF %d\n",
3414 vf->vf_id);
3415 aq_ret = I40E_ERR_PARAM;
3416 goto err;
3417 } else {
3418 /* we need to allocate max VF queues to enable ADq so as to
3419 * make sure ADq enabled VF always gets back queues when it
3420 * goes through a reset.
3421 */
3422 vf->num_queue_pairs = I40E_MAX_VF_QUEUES;
3423 }
3424
3425 /* get link speed in MB to validate rate limit */
3426 switch (ls->link_speed) {
3427 case VIRTCHNL_LINK_SPEED_100MB:
3428 speed = SPEED_100;
3429 break;
3430 case VIRTCHNL_LINK_SPEED_1GB:
3431 speed = SPEED_1000;
3432 break;
3433 case VIRTCHNL_LINK_SPEED_10GB:
3434 speed = SPEED_10000;
3435 break;
3436 case VIRTCHNL_LINK_SPEED_20GB:
3437 speed = SPEED_20000;
3438 break;
3439 case VIRTCHNL_LINK_SPEED_25GB:
3440 speed = SPEED_25000;
3441 break;
3442 case VIRTCHNL_LINK_SPEED_40GB:
3443 speed = SPEED_40000;
3444 break;
3445 default:
3446 dev_err(&pf->pdev->dev,
3447 "Cannot detect link speed\n");
3448 aq_ret = I40E_ERR_PARAM;
3449 goto err;
3450 }
3451
3452 /* parse data from the queue channel info */
3453 vf->num_tc = tci->num_tc;
3454 for (i = 0; i < vf->num_tc; i++) {
3455 if (tci->list[i].max_tx_rate) {
3456 if (tci->list[i].max_tx_rate > speed) {
3457 dev_err(&pf->pdev->dev,
3458 "Invalid max tx rate %llu specified for VF %d.",
3459 tci->list[i].max_tx_rate,
3460 vf->vf_id);
3461 aq_ret = I40E_ERR_PARAM;
3462 goto err;
3463 } else {
3464 vf->ch[i].max_tx_rate =
3465 tci->list[i].max_tx_rate;
3466 }
3467 }
3468 vf->ch[i].num_qps = tci->list[i].count;
3469 }
3470
3471 /* set this flag only after making sure all inputs are sane */
3472 vf->adq_enabled = true;
3473 /* num_req_queues is set when user changes number of queues via ethtool
3474 * and this causes issue for default VSI(which depends on this variable)
3475 * when ADq is enabled, hence reset it.
3476 */
3477 vf->num_req_queues = 0;
3478
3479 /* reset the VF in order to allocate resources */
3480 i40e_vc_notify_vf_reset(vf);
3481 i40e_reset_vf(vf, false);
3482
3483 return I40E_SUCCESS;
3484
3485 /* send the response to the VF */
3486err:
3487 return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_CHANNELS,
3488 aq_ret);
3489}
3490
3491/**
3492 * i40e_vc_del_qch_msg
3493 * @vf: pointer to the VF info
3494 * @msg: pointer to the msg buffer
3495 **/
3496static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg)
3497{
3498 struct i40e_pf *pf = vf->pf;
3499 i40e_status aq_ret = 0;
3500
3501 if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
3502 aq_ret = I40E_ERR_PARAM;
3503 goto err;
3504 }
3505
3506 if (vf->adq_enabled) {
3507 i40e_del_all_cloud_filters(vf);
3508 i40e_del_qch(vf);
3509 vf->adq_enabled = false;
3510 vf->num_tc = 0;
3511 dev_info(&pf->pdev->dev,
3512 "Deleting Queue Channels and cloud filters for ADq on VF %d\n",
3513 vf->vf_id);
3514 } else {
3515 dev_info(&pf->pdev->dev, "VF %d trying to delete queue channels but ADq isn't enabled\n",
3516 vf->vf_id);
3517 aq_ret = I40E_ERR_PARAM;
3518 }
3519
3520 /* reset the VF in order to allocate resources */
3521 i40e_vc_notify_vf_reset(vf);
3522 i40e_reset_vf(vf, false);
3523
3524 return I40E_SUCCESS;
3525
3526err:
3527 return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DISABLE_CHANNELS,
3528 aq_ret);
3529}
3530
3531/**
2691 * i40e_vc_process_vf_msg 3532 * i40e_vc_process_vf_msg
2692 * @pf: pointer to the PF structure 3533 * @pf: pointer to the PF structure
2693 * @vf_id: source VF id 3534 * @vf_id: source VF id
@@ -2816,7 +3657,18 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
2816 case VIRTCHNL_OP_REQUEST_QUEUES: 3657 case VIRTCHNL_OP_REQUEST_QUEUES:
2817 ret = i40e_vc_request_queues_msg(vf, msg, msglen); 3658 ret = i40e_vc_request_queues_msg(vf, msg, msglen);
2818 break; 3659 break;
2819 3660 case VIRTCHNL_OP_ENABLE_CHANNELS:
3661 ret = i40e_vc_add_qch_msg(vf, msg);
3662 break;
3663 case VIRTCHNL_OP_DISABLE_CHANNELS:
3664 ret = i40e_vc_del_qch_msg(vf, msg);
3665 break;
3666 case VIRTCHNL_OP_ADD_CLOUD_FILTER:
3667 ret = i40e_vc_add_cloud_filter(vf, msg);
3668 break;
3669 case VIRTCHNL_OP_DEL_CLOUD_FILTER:
3670 ret = i40e_vc_del_cloud_filter(vf, msg);
3671 break;
2820 case VIRTCHNL_OP_UNKNOWN: 3672 case VIRTCHNL_OP_UNKNOWN:
2821 default: 3673 default:
2822 dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n", 3674 dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n",
@@ -3382,6 +4234,16 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting)
3382 i40e_vc_disable_vf(vf); 4234 i40e_vc_disable_vf(vf);
3383 dev_info(&pf->pdev->dev, "VF %u is now %strusted\n", 4235 dev_info(&pf->pdev->dev, "VF %u is now %strusted\n",
3384 vf_id, setting ? "" : "un"); 4236 vf_id, setting ? "" : "un");
4237
4238 if (vf->adq_enabled) {
4239 if (!vf->trusted) {
4240 dev_info(&pf->pdev->dev,
4241 "VF %u no longer Trusted, deleting all cloud filters\n",
4242 vf_id);
4243 i40e_del_all_cloud_filters(vf);
4244 }
4245 }
4246
3385out: 4247out:
3386 return ret; 4248 return ret;
3387} 4249}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 5efc4f92bb37..6852599b2379 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -69,6 +69,19 @@ enum i40e_vf_capabilities {
69 I40E_VIRTCHNL_VF_CAP_IWARP, 69 I40E_VIRTCHNL_VF_CAP_IWARP,
70}; 70};
71 71
72/* In ADq, max 4 VSI's can be allocated per VF including primary VF VSI.
73 * These variables are used to store indices, id's and number of queues
74 * for each VSI including that of primary VF VSI. Each Traffic class is
75 * termed as channel and each channel can in-turn have 4 queues which
76 * means max 16 queues overall per VF.
77 */
78struct i40evf_channel {
79 u16 vsi_idx; /* index in PF struct for all channel VSIs */
80 u16 vsi_id; /* VSI ID used by firmware */
81 u16 num_qps; /* number of queue pairs requested by user */
82 u64 max_tx_rate; /* bandwidth rate allocation for VSIs */
83};
84
72/* VF information structure */ 85/* VF information structure */
73struct i40e_vf { 86struct i40e_vf {
74 struct i40e_pf *pf; 87 struct i40e_pf *pf;
@@ -111,6 +124,13 @@ struct i40e_vf {
111 u16 num_mac; 124 u16 num_mac;
112 u16 num_vlan; 125 u16 num_vlan;
113 126
127 /* ADq related variables */
128 bool adq_enabled; /* flag to enable adq */
129 u8 num_tc;
130 struct i40evf_channel ch[I40E_MAX_VF_VSI];
131 struct hlist_head cloud_filter_list;
132 u16 num_cloud_filters;
133
114 /* RDMA Client */ 134 /* RDMA Client */
115 struct virtchnl_iwarp_qvlist_info *qvlist_info; 135 struct virtchnl_iwarp_qvlist_info *qvlist_info;
116}; 136};
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 357d6051281f..eb8f3e327f6b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -392,99 +392,241 @@ void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
392 val); 392 val);
393} 393}
394 394
395static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector,
396 struct i40e_ring_container *rc)
397{
398 return &q_vector->rx == rc;
399}
400
401static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector)
402{
403 unsigned int divisor;
404
405 switch (q_vector->adapter->link_speed) {
406 case I40E_LINK_SPEED_40GB:
407 divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024;
408 break;
409 case I40E_LINK_SPEED_25GB:
410 case I40E_LINK_SPEED_20GB:
411 divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512;
412 break;
413 default:
414 case I40E_LINK_SPEED_10GB:
415 divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256;
416 break;
417 case I40E_LINK_SPEED_1GB:
418 case I40E_LINK_SPEED_100MB:
419 divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32;
420 break;
421 }
422
423 return divisor;
424}
425
395/** 426/**
396 * i40e_set_new_dynamic_itr - Find new ITR level 427 * i40e_update_itr - update the dynamic ITR value based on statistics
428 * @q_vector: structure containing interrupt and ring information
397 * @rc: structure containing ring performance data 429 * @rc: structure containing ring performance data
398 * 430 *
399 * Returns true if ITR changed, false if not 431 * Stores a new ITR value based on packets and byte
400 * 432 * counts during the last interrupt. The advantage of per interrupt
401 * Stores a new ITR value based on packets and byte counts during 433 * computation is faster updates and more accurate ITR for the current
402 * the last interrupt. The advantage of per interrupt computation 434 * traffic pattern. Constants in this function were computed
403 * is faster updates and more accurate ITR for the current traffic 435 * based on theoretical maximum wire speed and thresholds were set based
404 * pattern. Constants in this function were computed based on 436 * on testing data as well as attempting to minimize response time
405 * theoretical maximum wire speed and thresholds were set based on
406 * testing data as well as attempting to minimize response time
407 * while increasing bulk throughput. 437 * while increasing bulk throughput.
408 **/ 438 **/
409static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) 439static void i40e_update_itr(struct i40e_q_vector *q_vector,
440 struct i40e_ring_container *rc)
410{ 441{
411 enum i40e_latency_range new_latency_range = rc->latency_range; 442 unsigned int avg_wire_size, packets, bytes, itr;
412 u32 new_itr = rc->itr; 443 unsigned long next_update = jiffies;
413 int bytes_per_usec;
414 unsigned int usecs, estimated_usecs;
415 444
416 if (rc->total_packets == 0 || !rc->itr) 445 /* If we don't have any rings just leave ourselves set for maximum
417 return false; 446 * possible latency so we take ourselves out of the equation.
447 */
448 if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))
449 return;
450
451 /* For Rx we want to push the delay up and default to low latency.
452 * for Tx we want to pull the delay down and default to high latency.
453 */
454 itr = i40e_container_is_rx(q_vector, rc) ?
455 I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY :
456 I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY;
457
458 /* If we didn't update within up to 1 - 2 jiffies we can assume
459 * that either packets are coming in so slow there hasn't been
460 * any work, or that there is so much work that NAPI is dealing
461 * with interrupt moderation and we don't need to do anything.
462 */
463 if (time_after(next_update, rc->next_update))
464 goto clear_counts;
465
466 /* If itr_countdown is set it means we programmed an ITR within
467 * the last 4 interrupt cycles. This has a side effect of us
468 * potentially firing an early interrupt. In order to work around
469 * this we need to throw out any data received for a few
470 * interrupts following the update.
471 */
472 if (q_vector->itr_countdown) {
473 itr = rc->target_itr;
474 goto clear_counts;
475 }
476
477 packets = rc->total_packets;
478 bytes = rc->total_bytes;
418 479
419 usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; 480 if (i40e_container_is_rx(q_vector, rc)) {
420 bytes_per_usec = rc->total_bytes / usecs; 481 /* If Rx there are 1 to 4 packets and bytes are less than
482 * 9000 assume insufficient data to use bulk rate limiting
483 * approach unless Tx is already in bulk rate limiting. We
484 * are likely latency driven.
485 */
486 if (packets && packets < 4 && bytes < 9000 &&
487 (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) {
488 itr = I40E_ITR_ADAPTIVE_LATENCY;
489 goto adjust_by_size;
490 }
491 } else if (packets < 4) {
492 /* If we have Tx and Rx ITR maxed and Tx ITR is running in
493 * bulk mode and we are receiving 4 or fewer packets just
494 * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
495 * that the Rx can relax.
496 */
497 if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS &&
498 (q_vector->rx.target_itr & I40E_ITR_MASK) ==
499 I40E_ITR_ADAPTIVE_MAX_USECS)
500 goto clear_counts;
501 } else if (packets > 32) {
502 /* If we have processed over 32 packets in a single interrupt
503 * for Tx assume we need to switch over to "bulk" mode.
504 */
505 rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY;
506 }
421 507
422 /* The calculations in this algorithm depend on interrupts actually 508 /* We have no packets to actually measure against. This means
423 * firing at the ITR rate. This may not happen if the packet rate is 509 * either one of the other queues on this vector is active or
424 * really low, or if we've been napi polling. Check to make sure 510 * we are a Tx queue doing TSO with too high of an interrupt rate.
425 * that's not the case before we continue. 511 *
512 * Between 4 and 56 we can assume that our current interrupt delay
513 * is only slightly too low. As such we should increase it by a small
514 * fixed amount.
426 */ 515 */
427 estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update); 516 if (packets < 56) {
428 if (estimated_usecs > usecs) { 517 itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC;
429 new_latency_range = I40E_LOW_LATENCY; 518 if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
430 goto reset_latency; 519 itr &= I40E_ITR_ADAPTIVE_LATENCY;
520 itr += I40E_ITR_ADAPTIVE_MAX_USECS;
521 }
522 goto clear_counts;
523 }
524
525 if (packets <= 256) {
526 itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
527 itr &= I40E_ITR_MASK;
528
529 /* Between 56 and 112 is our "goldilocks" zone where we are
530 * working out "just right". Just report that our current
531 * ITR is good for us.
532 */
533 if (packets <= 112)
534 goto clear_counts;
535
536 /* If packet count is 128 or greater we are likely looking
537 * at a slight overrun of the delay we want. Try halving
538 * our delay to see if that will cut the number of packets
539 * in half per interrupt.
540 */
541 itr /= 2;
542 itr &= I40E_ITR_MASK;
543 if (itr < I40E_ITR_ADAPTIVE_MIN_USECS)
544 itr = I40E_ITR_ADAPTIVE_MIN_USECS;
545
546 goto clear_counts;
431 } 547 }
432 548
433 /* simple throttlerate management 549 /* The paths below assume we are dealing with a bulk ITR since
434 * 0-10MB/s lowest (50000 ints/s) 550 * number of packets is greater than 256. We are just going to have
435 * 10-20MB/s low (20000 ints/s) 551 * to compute a value and try to bring the count under control,
436 * 20-1249MB/s bulk (18000 ints/s) 552 * though for smaller packet sizes there isn't much we can do as
553 * NAPI polling will likely be kicking in sooner rather than later.
554 */
555 itr = I40E_ITR_ADAPTIVE_BULK;
556
557adjust_by_size:
558 /* If packet counts are 256 or greater we can assume we have a gross
559 * overestimation of what the rate should be. Instead of trying to fine
560 * tune it just use the formula below to try and dial in an exact value
561 * give the current packet size of the frame.
562 */
563 avg_wire_size = bytes / packets;
564
565 /* The following is a crude approximation of:
566 * wmem_default / (size + overhead) = desired_pkts_per_int
567 * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
568 * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
437 * 569 *
438 * The math works out because the divisor is in 10^(-6) which 570 * Assuming wmem_default is 212992 and overhead is 640 bytes per
439 * turns the bytes/us input value into MB/s values, but 571 * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
440 * make sure to use usecs, as the register values written 572 * formula down to
441 * are in 2 usec increments in the ITR registers, and make sure 573 *
442 * to use the smoothed values that the countdown timer gives us. 574 * (170 * (size + 24)) / (size + 640) = ITR
575 *
576 * We first do some math on the packet size and then finally bitshift
577 * by 8 after rounding up. We also have to account for PCIe link speed
578 * difference as ITR scales based on this.
443 */ 579 */
444 switch (new_latency_range) { 580 if (avg_wire_size <= 60) {
445 case I40E_LOWEST_LATENCY: 581 /* Start at 250k ints/sec */
446 if (bytes_per_usec > 10) 582 avg_wire_size = 4096;
447 new_latency_range = I40E_LOW_LATENCY; 583 } else if (avg_wire_size <= 380) {
448 break; 584 /* 250K ints/sec to 60K ints/sec */
449 case I40E_LOW_LATENCY: 585 avg_wire_size *= 40;
450 if (bytes_per_usec > 20) 586 avg_wire_size += 1696;
451 new_latency_range = I40E_BULK_LATENCY; 587 } else if (avg_wire_size <= 1084) {
452 else if (bytes_per_usec <= 10) 588 /* 60K ints/sec to 36K ints/sec */
453 new_latency_range = I40E_LOWEST_LATENCY; 589 avg_wire_size *= 15;
454 break; 590 avg_wire_size += 11452;
455 case I40E_BULK_LATENCY: 591 } else if (avg_wire_size <= 1980) {
456 default: 592 /* 36K ints/sec to 30K ints/sec */
457 if (bytes_per_usec <= 20) 593 avg_wire_size *= 5;
458 new_latency_range = I40E_LOW_LATENCY; 594 avg_wire_size += 22420;
459 break; 595 } else {
596 /* plateau at a limit of 30K ints/sec */
597 avg_wire_size = 32256;
460 } 598 }
461 599
462reset_latency: 600 /* If we are in low latency mode halve our delay which doubles the
463 rc->latency_range = new_latency_range; 601 * rate to somewhere between 100K to 16K ints/sec
602 */
603 if (itr & I40E_ITR_ADAPTIVE_LATENCY)
604 avg_wire_size /= 2;
464 605
465 switch (new_latency_range) { 606 /* Resultant value is 256 times larger than it needs to be. This
466 case I40E_LOWEST_LATENCY: 607 * gives us room to adjust the value as needed to either increase
467 new_itr = I40E_ITR_50K; 608 * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
468 break; 609 *
469 case I40E_LOW_LATENCY: 610 * Use addition as we have already recorded the new latency flag
470 new_itr = I40E_ITR_20K; 611 * for the ITR value.
471 break; 612 */
472 case I40E_BULK_LATENCY: 613 itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) *
473 new_itr = I40E_ITR_18K; 614 I40E_ITR_ADAPTIVE_MIN_INC;
474 break; 615
475 default: 616 if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
476 break; 617 itr &= I40E_ITR_ADAPTIVE_LATENCY;
618 itr += I40E_ITR_ADAPTIVE_MAX_USECS;
477 } 619 }
478 620
621clear_counts:
622 /* write back value */
623 rc->target_itr = itr;
624
625 /* next update should occur within next jiffy */
626 rc->next_update = next_update + 1;
627
479 rc->total_bytes = 0; 628 rc->total_bytes = 0;
480 rc->total_packets = 0; 629 rc->total_packets = 0;
481 rc->last_itr_update = jiffies;
482
483 if (new_itr != rc->itr) {
484 rc->itr = new_itr;
485 return true;
486 }
487 return false;
488} 630}
489 631
490/** 632/**
@@ -1273,7 +1415,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
1273 * @rx_buffer: rx buffer to pull data from 1415 * @rx_buffer: rx buffer to pull data from
1274 * 1416 *
1275 * This function will clean up the contents of the rx_buffer. It will 1417 * This function will clean up the contents of the rx_buffer. It will
1276 * either recycle the bufer or unmap it and free the associated resources. 1418 * either recycle the buffer or unmap it and free the associated resources.
1277 */ 1419 */
1278static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, 1420static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
1279 struct i40e_rx_buffer *rx_buffer) 1421 struct i40e_rx_buffer *rx_buffer)
@@ -1457,33 +1599,45 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
1457 return failure ? budget : (int)total_rx_packets; 1599 return failure ? budget : (int)total_rx_packets;
1458} 1600}
1459 1601
1460static u32 i40e_buildreg_itr(const int type, const u16 itr) 1602static inline u32 i40e_buildreg_itr(const int type, u16 itr)
1461{ 1603{
1462 u32 val; 1604 u32 val;
1463 1605
1606 /* We don't bother with setting the CLEARPBA bit as the data sheet
1607 * points out doing so is "meaningless since it was already
1608 * auto-cleared". The auto-clearing happens when the interrupt is
1609 * asserted.
1610 *
1611 * Hardware errata 28 for also indicates that writing to a
1612 * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear
1613 * an event in the PBA anyway so we need to rely on the automask
1614 * to hold pending events for us until the interrupt is re-enabled
1615 *
1616 * The itr value is reported in microseconds, and the register
1617 * value is recorded in 2 microsecond units. For this reason we
1618 * only need to shift by the interval shift - 1 instead of the
1619 * full value.
1620 */
1621 itr &= I40E_ITR_MASK;
1622
1464 val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | 1623 val = I40E_VFINT_DYN_CTLN1_INTENA_MASK |
1465 I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK |
1466 (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) | 1624 (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) |
1467 (itr << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT); 1625 (itr << (I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT - 1));
1468 1626
1469 return val; 1627 return val;
1470} 1628}
1471 1629
1472/* a small macro to shorten up some long lines */ 1630/* a small macro to shorten up some long lines */
1473#define INTREG I40E_VFINT_DYN_CTLN1 1631#define INTREG I40E_VFINT_DYN_CTLN1
1474static inline int get_rx_itr(struct i40e_vsi *vsi, int idx)
1475{
1476 struct i40evf_adapter *adapter = vsi->back;
1477 1632
1478 return adapter->rx_rings[idx].rx_itr_setting; 1633/* The act of updating the ITR will cause it to immediately trigger. In order
1479} 1634 * to prevent this from throwing off adaptive update statistics we defer the
1480 1635 * update so that it can only happen so often. So after either Tx or Rx are
1481static inline int get_tx_itr(struct i40e_vsi *vsi, int idx) 1636 * updated we make the adaptive scheme wait until either the ITR completely
1482{ 1637 * expires via the next_update expiration or we have been through at least
1483 struct i40evf_adapter *adapter = vsi->back; 1638 * 3 interrupts.
1484 1639 */
1485 return adapter->tx_rings[idx].tx_itr_setting; 1640#define ITR_COUNTDOWN_START 3
1486}
1487 1641
1488/** 1642/**
1489 * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt 1643 * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
@@ -1495,70 +1649,51 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
1495 struct i40e_q_vector *q_vector) 1649 struct i40e_q_vector *q_vector)
1496{ 1650{
1497 struct i40e_hw *hw = &vsi->back->hw; 1651 struct i40e_hw *hw = &vsi->back->hw;
1498 bool rx = false, tx = false; 1652 u32 intval;
1499 u32 rxval, txval;
1500 int idx = q_vector->v_idx;
1501 int rx_itr_setting, tx_itr_setting;
1502
1503 /* avoid dynamic calculation if in countdown mode OR if
1504 * all dynamic is disabled
1505 */
1506 rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
1507
1508 rx_itr_setting = get_rx_itr(vsi, idx);
1509 tx_itr_setting = get_tx_itr(vsi, idx);
1510 1653
1511 if (q_vector->itr_countdown > 0 || 1654 /* These will do nothing if dynamic updates are not enabled */
1512 (!ITR_IS_DYNAMIC(rx_itr_setting) && 1655 i40e_update_itr(q_vector, &q_vector->tx);
1513 !ITR_IS_DYNAMIC(tx_itr_setting))) { 1656 i40e_update_itr(q_vector, &q_vector->rx);
1514 goto enable_int;
1515 }
1516
1517 if (ITR_IS_DYNAMIC(rx_itr_setting)) {
1518 rx = i40e_set_new_dynamic_itr(&q_vector->rx);
1519 rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
1520 }
1521 1657
1522 if (ITR_IS_DYNAMIC(tx_itr_setting)) { 1658 /* This block of logic allows us to get away with only updating
1523 tx = i40e_set_new_dynamic_itr(&q_vector->tx); 1659 * one ITR value with each interrupt. The idea is to perform a
1524 txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); 1660 * pseudo-lazy update with the following criteria.
1525 } 1661 *
1526 1662 * 1. Rx is given higher priority than Tx if both are in same state
1527 if (rx || tx) { 1663 * 2. If we must reduce an ITR that is given highest priority.
1528 /* get the higher of the two ITR adjustments and 1664 * 3. We then give priority to increasing ITR based on amount.
1529 * use the same value for both ITR registers
1530 * when in adaptive mode (Rx and/or Tx)
1531 */
1532 u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
1533
1534 q_vector->tx.itr = q_vector->rx.itr = itr;
1535 txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
1536 tx = true;
1537 rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
1538 rx = true;
1539 }
1540
1541 /* only need to enable the interrupt once, but need
1542 * to possibly update both ITR values
1543 */ 1665 */
1544 if (rx) { 1666 if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
1545 /* set the INTENA_MSK_MASK so that this first write 1667 /* Rx ITR needs to be reduced, this is highest priority */
1546 * won't actually enable the interrupt, instead just 1668 intval = i40e_buildreg_itr(I40E_RX_ITR,
1547 * updating the ITR (it's bit 31 PF and VF) 1669 q_vector->rx.target_itr);
1670 q_vector->rx.current_itr = q_vector->rx.target_itr;
1671 q_vector->itr_countdown = ITR_COUNTDOWN_START;
1672 } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
1673 ((q_vector->rx.target_itr - q_vector->rx.current_itr) <
1674 (q_vector->tx.target_itr - q_vector->tx.current_itr))) {
1675 /* Tx ITR needs to be reduced, this is second priority
1676 * Tx ITR needs to be increased more than Rx, fourth priority
1548 */ 1677 */
1549 rxval |= BIT(31); 1678 intval = i40e_buildreg_itr(I40E_TX_ITR,
1550 /* don't check _DOWN because interrupt isn't being enabled */ 1679 q_vector->tx.target_itr);
1551 wr32(hw, INTREG(q_vector->reg_idx), rxval); 1680 q_vector->tx.current_itr = q_vector->tx.target_itr;
1681 q_vector->itr_countdown = ITR_COUNTDOWN_START;
1682 } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
1683 /* Rx ITR needs to be increased, third priority */
1684 intval = i40e_buildreg_itr(I40E_RX_ITR,
1685 q_vector->rx.target_itr);
1686 q_vector->rx.current_itr = q_vector->rx.target_itr;
1687 q_vector->itr_countdown = ITR_COUNTDOWN_START;
1688 } else {
1689 /* No ITR update, lowest priority */
1690 intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
1691 if (q_vector->itr_countdown)
1692 q_vector->itr_countdown--;
1552 } 1693 }
1553 1694
1554enable_int:
1555 if (!test_bit(__I40E_VSI_DOWN, vsi->state)) 1695 if (!test_bit(__I40E_VSI_DOWN, vsi->state))
1556 wr32(hw, INTREG(q_vector->reg_idx), txval); 1696 wr32(hw, INTREG(q_vector->reg_idx), intval);
1557
1558 if (q_vector->itr_countdown)
1559 q_vector->itr_countdown--;
1560 else
1561 q_vector->itr_countdown = ITR_COUNTDOWN_START;
1562} 1697}
1563 1698
1564/** 1699/**
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index 7798a6645c3f..9129447d079b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -28,31 +28,35 @@
28#define _I40E_TXRX_H_ 28#define _I40E_TXRX_H_
29 29
30/* Interrupt Throttling and Rate Limiting Goodies */ 30/* Interrupt Throttling and Rate Limiting Goodies */
31
32#define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */
33#define I40E_MIN_ITR 0x0001 /* reg uses 2 usec resolution */
34#define I40E_ITR_100K 0x0005
35#define I40E_ITR_50K 0x000A
36#define I40E_ITR_20K 0x0019
37#define I40E_ITR_18K 0x001B
38#define I40E_ITR_8K 0x003E
39#define I40E_ITR_4K 0x007A
40#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */
41#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \
42 I40E_ITR_DYNAMIC)
43#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \
44 I40E_ITR_DYNAMIC)
45#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */
46#define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */
47#define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */
48#define I40E_DEFAULT_IRQ_WORK 256 31#define I40E_DEFAULT_IRQ_WORK 256
49#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1) 32
50#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC)) 33/* The datasheet for the X710 and XL710 indicate that the maximum value for
51#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1) 34 * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec
35 * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing
36 * the register value which is divided by 2 lets use the actual values and
37 * avoid an excessive amount of translation.
38 */
39#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */
40#define I40E_ITR_MASK 0x1FFE /* mask for ITR register value */
41#define I40E_MIN_ITR 2 /* reg uses 2 usec resolution */
42#define I40E_ITR_100K 10 /* all values below must be even */
43#define I40E_ITR_50K 20
44#define I40E_ITR_20K 50
45#define I40E_ITR_18K 60
46#define I40E_ITR_8K 122
47#define I40E_MAX_ITR 8160 /* maximum value as per datasheet */
48#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC)
49#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK)
50#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC))
51
52#define I40E_ITR_RX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC)
53#define I40E_ITR_TX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC)
54
52/* 0x40 is the enable bit for interrupt rate limiting, and must be set if 55/* 0x40 is the enable bit for interrupt rate limiting, and must be set if
53 * the value of the rate limit is non-zero 56 * the value of the rate limit is non-zero
54 */ 57 */
55#define INTRL_ENA BIT(6) 58#define INTRL_ENA BIT(6)
59#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */
56#define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2) 60#define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2)
57#define INTRL_USEC_TO_REG(set) ((set) ? ((set) >> 2) | INTRL_ENA : 0) 61#define INTRL_USEC_TO_REG(set) ((set) ? ((set) >> 2) | INTRL_ENA : 0)
58#define I40E_INTRL_8K 125 /* 8000 ints/sec */ 62#define I40E_INTRL_8K 125 /* 8000 ints/sec */
@@ -362,8 +366,7 @@ struct i40e_ring {
362 * these values always store the USER setting, and must be converted 366 * these values always store the USER setting, and must be converted
363 * before programming to a register. 367 * before programming to a register.
364 */ 368 */
365 u16 rx_itr_setting; 369 u16 itr_setting;
366 u16 tx_itr_setting;
367 370
368 u16 count; /* Number of descriptors */ 371 u16 count; /* Number of descriptors */
369 u16 reg_idx; /* HW register index of the ring */ 372 u16 reg_idx; /* HW register index of the ring */
@@ -425,21 +428,21 @@ static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring)
425 ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED; 428 ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
426} 429}
427 430
428enum i40e_latency_range { 431#define I40E_ITR_ADAPTIVE_MIN_INC 0x0002
429 I40E_LOWEST_LATENCY = 0, 432#define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002
430 I40E_LOW_LATENCY = 1, 433#define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e
431 I40E_BULK_LATENCY = 2, 434#define I40E_ITR_ADAPTIVE_LATENCY 0x8000
432}; 435#define I40E_ITR_ADAPTIVE_BULK 0x0000
436#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY))
433 437
434struct i40e_ring_container { 438struct i40e_ring_container {
435 /* array of pointers to rings */ 439 struct i40e_ring *ring; /* pointer to linked list of ring(s) */
436 struct i40e_ring *ring; 440 unsigned long next_update; /* jiffies value of next update */
437 unsigned int total_bytes; /* total bytes processed this int */ 441 unsigned int total_bytes; /* total bytes processed this int */
438 unsigned int total_packets; /* total packets processed this int */ 442 unsigned int total_packets; /* total packets processed this int */
439 unsigned long last_itr_update; /* jiffies of last ITR update */
440 u16 count; 443 u16 count;
441 enum i40e_latency_range latency_range; 444 u16 target_itr; /* target ITR setting for ring(s) */
442 u16 itr; 445 u16 current_itr; /* current ITR setting for ring(s) */
443}; 446};
444 447
445/* iterator for handling rings in ring container */ 448/* iterator for handling rings in ring container */
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 9690c1ea019e..e46555ad7122 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -52,7 +52,10 @@
52#include <linux/socket.h> 52#include <linux/socket.h>
53#include <linux/jiffies.h> 53#include <linux/jiffies.h>
54#include <net/ip6_checksum.h> 54#include <net/ip6_checksum.h>
55#include <net/pkt_cls.h>
55#include <net/udp.h> 56#include <net/udp.h>
57#include <net/tc_act/tc_gact.h>
58#include <net/tc_act/tc_mirred.h>
56 59
57#include "i40e_type.h" 60#include "i40e_type.h"
58#include <linux/avf/virtchnl.h> 61#include <linux/avf/virtchnl.h>
@@ -106,6 +109,7 @@ struct i40e_vsi {
106 109
107#define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4) 110#define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4)
108#define I40EVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4) 111#define I40EVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4)
112#define I40EVF_MBPS_DIVISOR 125000 /* divisor to convert to Mbps */
109 113
110/* MAX_MSIX_Q_VECTORS of these are allocated, 114/* MAX_MSIX_Q_VECTORS of these are allocated,
111 * but we only use one per queue-specific vector. 115 * but we only use one per queue-specific vector.
@@ -117,9 +121,8 @@ struct i40e_q_vector {
117 struct i40e_ring_container rx; 121 struct i40e_ring_container rx;
118 struct i40e_ring_container tx; 122 struct i40e_ring_container tx;
119 u32 ring_mask; 123 u32 ring_mask;
124 u8 itr_countdown; /* when 0 should adjust adaptive ITR */
120 u8 num_ringpairs; /* total number of ring pairs in vector */ 125 u8 num_ringpairs; /* total number of ring pairs in vector */
121#define ITR_COUNTDOWN_START 100
122 u8 itr_countdown; /* when 0 or 1 update ITR */
123 u16 v_idx; /* index in the vsi->q_vector array. */ 126 u16 v_idx; /* index in the vsi->q_vector array. */
124 u16 reg_idx; /* register index of the interrupt */ 127 u16 reg_idx; /* register index of the interrupt */
125 char name[IFNAMSIZ + 15]; 128 char name[IFNAMSIZ + 15];
@@ -169,6 +172,28 @@ struct i40evf_vlan_filter {
169 bool add; /* filter needs to be added */ 172 bool add; /* filter needs to be added */
170}; 173};
171 174
175#define I40EVF_MAX_TRAFFIC_CLASS 4
176/* State of traffic class creation */
177enum i40evf_tc_state_t {
178 __I40EVF_TC_INVALID, /* no traffic class, default state */
179 __I40EVF_TC_RUNNING, /* traffic classes have been created */
180};
181
182/* channel info */
183struct i40evf_channel_config {
184 struct virtchnl_channel_info ch_info[I40EVF_MAX_TRAFFIC_CLASS];
185 enum i40evf_tc_state_t state;
186 u8 total_qps;
187};
188
189/* State of cloud filter */
190enum i40evf_cloud_filter_state_t {
191 __I40EVF_CF_INVALID, /* cloud filter not added */
192 __I40EVF_CF_ADD_PENDING, /* cloud filter pending add by the PF */
193 __I40EVF_CF_DEL_PENDING, /* cloud filter pending del by the PF */
194 __I40EVF_CF_ACTIVE, /* cloud filter is active */
195};
196
172/* Driver state. The order of these is important! */ 197/* Driver state. The order of these is important! */
173enum i40evf_state_t { 198enum i40evf_state_t {
174 __I40EVF_STARTUP, /* driver loaded, probe complete */ 199 __I40EVF_STARTUP, /* driver loaded, probe complete */
@@ -190,6 +215,36 @@ enum i40evf_critical_section_t {
190 __I40EVF_IN_REMOVE_TASK, /* device being removed */ 215 __I40EVF_IN_REMOVE_TASK, /* device being removed */
191}; 216};
192 217
218#define I40EVF_CLOUD_FIELD_OMAC 0x01
219#define I40EVF_CLOUD_FIELD_IMAC 0x02
220#define I40EVF_CLOUD_FIELD_IVLAN 0x04
221#define I40EVF_CLOUD_FIELD_TEN_ID 0x08
222#define I40EVF_CLOUD_FIELD_IIP 0x10
223
224#define I40EVF_CF_FLAGS_OMAC I40EVF_CLOUD_FIELD_OMAC
225#define I40EVF_CF_FLAGS_IMAC I40EVF_CLOUD_FIELD_IMAC
226#define I40EVF_CF_FLAGS_IMAC_IVLAN (I40EVF_CLOUD_FIELD_IMAC |\
227 I40EVF_CLOUD_FIELD_IVLAN)
228#define I40EVF_CF_FLAGS_IMAC_TEN_ID (I40EVF_CLOUD_FIELD_IMAC |\
229 I40EVF_CLOUD_FIELD_TEN_ID)
230#define I40EVF_CF_FLAGS_OMAC_TEN_ID_IMAC (I40EVF_CLOUD_FIELD_OMAC |\
231 I40EVF_CLOUD_FIELD_IMAC |\
232 I40EVF_CLOUD_FIELD_TEN_ID)
233#define I40EVF_CF_FLAGS_IMAC_IVLAN_TEN_ID (I40EVF_CLOUD_FIELD_IMAC |\
234 I40EVF_CLOUD_FIELD_IVLAN |\
235 I40EVF_CLOUD_FIELD_TEN_ID)
236#define I40EVF_CF_FLAGS_IIP I40E_CLOUD_FIELD_IIP
237
238/* bookkeeping of cloud filters */
239struct i40evf_cloud_filter {
240 enum i40evf_cloud_filter_state_t state;
241 struct list_head list;
242 struct virtchnl_filter f;
243 unsigned long cookie;
244 bool del; /* filter needs to be deleted */
245 bool add; /* filter needs to be added */
246};
247
193/* board specific private data structure */ 248/* board specific private data structure */
194struct i40evf_adapter { 249struct i40evf_adapter {
195 struct timer_list watchdog_timer; 250 struct timer_list watchdog_timer;
@@ -241,6 +296,7 @@ struct i40evf_adapter {
241#define I40EVF_FLAG_ALLMULTI_ON BIT(14) 296#define I40EVF_FLAG_ALLMULTI_ON BIT(14)
242#define I40EVF_FLAG_LEGACY_RX BIT(15) 297#define I40EVF_FLAG_LEGACY_RX BIT(15)
243#define I40EVF_FLAG_REINIT_ITR_NEEDED BIT(16) 298#define I40EVF_FLAG_REINIT_ITR_NEEDED BIT(16)
299#define I40EVF_FLAG_QUEUES_DISABLED BIT(17)
244/* duplicates for common code */ 300/* duplicates for common code */
245#define I40E_FLAG_DCB_ENABLED 0 301#define I40E_FLAG_DCB_ENABLED 0
246#define I40E_FLAG_RX_CSUM_ENABLED I40EVF_FLAG_RX_CSUM_ENABLED 302#define I40E_FLAG_RX_CSUM_ENABLED I40EVF_FLAG_RX_CSUM_ENABLED
@@ -269,6 +325,10 @@ struct i40evf_adapter {
269#define I40EVF_FLAG_AQ_RELEASE_ALLMULTI BIT(18) 325#define I40EVF_FLAG_AQ_RELEASE_ALLMULTI BIT(18)
270#define I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING BIT(19) 326#define I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING BIT(19)
271#define I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING BIT(20) 327#define I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING BIT(20)
328#define I40EVF_FLAG_AQ_ENABLE_CHANNELS BIT(21)
329#define I40EVF_FLAG_AQ_DISABLE_CHANNELS BIT(22)
330#define I40EVF_FLAG_AQ_ADD_CLOUD_FILTER BIT(23)
331#define I40EVF_FLAG_AQ_DEL_CLOUD_FILTER BIT(24)
272 332
273 /* OS defined structs */ 333 /* OS defined structs */
274 struct net_device *netdev; 334 struct net_device *netdev;
@@ -314,6 +374,13 @@ struct i40evf_adapter {
314 u16 rss_lut_size; 374 u16 rss_lut_size;
315 u8 *rss_key; 375 u8 *rss_key;
316 u8 *rss_lut; 376 u8 *rss_lut;
377 /* ADQ related members */
378 struct i40evf_channel_config ch_config;
379 u8 num_tc;
380 struct list_head cloud_filter_list;
381 /* lock to protest access to the cloud filter list */
382 spinlock_t cloud_filter_list_lock;
383 u16 num_cloud_filters;
317}; 384};
318 385
319 386
@@ -380,4 +447,8 @@ void i40evf_notify_client_message(struct i40e_vsi *vsi, u8 *msg, u16 len);
380void i40evf_notify_client_l2_params(struct i40e_vsi *vsi); 447void i40evf_notify_client_l2_params(struct i40e_vsi *vsi);
381void i40evf_notify_client_open(struct i40e_vsi *vsi); 448void i40evf_notify_client_open(struct i40e_vsi *vsi);
382void i40evf_notify_client_close(struct i40e_vsi *vsi, bool reset); 449void i40evf_notify_client_close(struct i40e_vsi *vsi, bool reset);
450void i40evf_enable_channels(struct i40evf_adapter *adapter);
451void i40evf_disable_channels(struct i40evf_adapter *adapter);
452void i40evf_add_cloud_filter(struct i40evf_adapter *adapter);
453void i40evf_del_cloud_filter(struct i40evf_adapter *adapter);
383#endif /* _I40EVF_H_ */ 454#endif /* _I40EVF_H_ */
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index e2d8aa19d205..e6793255de0b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -457,14 +457,14 @@ static int __i40evf_get_coalesce(struct net_device *netdev,
457 rx_ring = &adapter->rx_rings[queue]; 457 rx_ring = &adapter->rx_rings[queue];
458 tx_ring = &adapter->tx_rings[queue]; 458 tx_ring = &adapter->tx_rings[queue];
459 459
460 if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting)) 460 if (ITR_IS_DYNAMIC(rx_ring->itr_setting))
461 ec->use_adaptive_rx_coalesce = 1; 461 ec->use_adaptive_rx_coalesce = 1;
462 462
463 if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting)) 463 if (ITR_IS_DYNAMIC(tx_ring->itr_setting))
464 ec->use_adaptive_tx_coalesce = 1; 464 ec->use_adaptive_tx_coalesce = 1;
465 465
466 ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; 466 ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
467 ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; 467 ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
468 468
469 return 0; 469 return 0;
470} 470}
@@ -502,7 +502,7 @@ static int i40evf_get_per_queue_coalesce(struct net_device *netdev,
502 502
503/** 503/**
504 * i40evf_set_itr_per_queue - set ITR values for specific queue 504 * i40evf_set_itr_per_queue - set ITR values for specific queue
505 * @vsi: the VSI to set values for 505 * @adapter: the VF adapter struct to set values for
506 * @ec: coalesce settings from ethtool 506 * @ec: coalesce settings from ethtool
507 * @queue: the queue to modify 507 * @queue: the queue to modify
508 * 508 *
@@ -514,33 +514,29 @@ static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter,
514{ 514{
515 struct i40e_ring *rx_ring = &adapter->rx_rings[queue]; 515 struct i40e_ring *rx_ring = &adapter->rx_rings[queue];
516 struct i40e_ring *tx_ring = &adapter->tx_rings[queue]; 516 struct i40e_ring *tx_ring = &adapter->tx_rings[queue];
517 struct i40e_vsi *vsi = &adapter->vsi;
518 struct i40e_hw *hw = &adapter->hw;
519 struct i40e_q_vector *q_vector; 517 struct i40e_q_vector *q_vector;
520 u16 vector;
521 518
522 rx_ring->rx_itr_setting = ec->rx_coalesce_usecs; 519 rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
523 tx_ring->tx_itr_setting = ec->tx_coalesce_usecs; 520 tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
524 521
525 rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC; 522 rx_ring->itr_setting |= I40E_ITR_DYNAMIC;
526 if (!ec->use_adaptive_rx_coalesce) 523 if (!ec->use_adaptive_rx_coalesce)
527 rx_ring->rx_itr_setting ^= I40E_ITR_DYNAMIC; 524 rx_ring->itr_setting ^= I40E_ITR_DYNAMIC;
528 525
529 tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC; 526 tx_ring->itr_setting |= I40E_ITR_DYNAMIC;
530 if (!ec->use_adaptive_tx_coalesce) 527 if (!ec->use_adaptive_tx_coalesce)
531 tx_ring->tx_itr_setting ^= I40E_ITR_DYNAMIC; 528 tx_ring->itr_setting ^= I40E_ITR_DYNAMIC;
532 529
533 q_vector = rx_ring->q_vector; 530 q_vector = rx_ring->q_vector;
534 q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); 531 q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
535 vector = vsi->base_vector + q_vector->v_idx;
536 wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1), q_vector->rx.itr);
537 532
538 q_vector = tx_ring->q_vector; 533 q_vector = tx_ring->q_vector;
539 q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); 534 q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
540 vector = vsi->base_vector + q_vector->v_idx;
541 wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1), q_vector->tx.itr);
542 535
543 i40e_flush(hw); 536 /* The interrupt handler itself will take care of programming
537 * the Tx and Rx ITR values based on the values we have entered
538 * into the q_vector, no need to write the values now.
539 */
544} 540}
545 541
546/** 542/**
@@ -565,8 +561,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev,
565 if (ec->rx_coalesce_usecs == 0) { 561 if (ec->rx_coalesce_usecs == 0) {
566 if (ec->use_adaptive_rx_coalesce) 562 if (ec->use_adaptive_rx_coalesce)
567 netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n"); 563 netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n");
568 } else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) || 564 } else if ((ec->rx_coalesce_usecs < I40E_MIN_ITR) ||
569 (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) { 565 (ec->rx_coalesce_usecs > I40E_MAX_ITR)) {
570 netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); 566 netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
571 return -EINVAL; 567 return -EINVAL;
572 } 568 }
@@ -575,8 +571,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev,
575 if (ec->tx_coalesce_usecs == 0) { 571 if (ec->tx_coalesce_usecs == 0) {
576 if (ec->use_adaptive_tx_coalesce) 572 if (ec->use_adaptive_tx_coalesce)
577 netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n"); 573 netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n");
578 } else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) || 574 } else if ((ec->tx_coalesce_usecs < I40E_MIN_ITR) ||
579 (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) { 575 (ec->tx_coalesce_usecs > I40E_MAX_ITR)) {
580 netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); 576 netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
581 return -EINVAL; 577 return -EINVAL;
582 } 578 }
@@ -699,6 +695,12 @@ static int i40evf_set_channels(struct net_device *netdev,
699 return -EINVAL; 695 return -EINVAL;
700 } 696 }
701 697
698 if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
699 adapter->num_tc) {
700 dev_info(&adapter->pdev->dev, "Cannot set channels since ADq is enabled.\n");
701 return -EINVAL;
702 }
703
702 /* All of these should have already been checked by ethtool before this 704 /* All of these should have already been checked by ethtool before this
703 * even gets to us, but just to be sure. 705 * even gets to us, but just to be sure.
704 */ 706 */
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 16989ad2ca90..4955ce3ab6a2 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -353,11 +353,12 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx)
353 rx_ring->vsi = &adapter->vsi; 353 rx_ring->vsi = &adapter->vsi;
354 q_vector->rx.ring = rx_ring; 354 q_vector->rx.ring = rx_ring;
355 q_vector->rx.count++; 355 q_vector->rx.count++;
356 q_vector->rx.latency_range = I40E_LOW_LATENCY; 356 q_vector->rx.next_update = jiffies + 1;
357 q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); 357 q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
358 q_vector->ring_mask |= BIT(r_idx); 358 q_vector->ring_mask |= BIT(r_idx);
359 q_vector->itr_countdown = ITR_COUNTDOWN_START; 359 wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, q_vector->reg_idx),
360 wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, v_idx - 1), q_vector->rx.itr); 360 q_vector->rx.current_itr);
361 q_vector->rx.current_itr = q_vector->rx.target_itr;
361} 362}
362 363
363/** 364/**
@@ -378,11 +379,12 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx)
378 tx_ring->vsi = &adapter->vsi; 379 tx_ring->vsi = &adapter->vsi;
379 q_vector->tx.ring = tx_ring; 380 q_vector->tx.ring = tx_ring;
380 q_vector->tx.count++; 381 q_vector->tx.count++;
381 q_vector->tx.latency_range = I40E_LOW_LATENCY; 382 q_vector->tx.next_update = jiffies + 1;
382 q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); 383 q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
383 q_vector->itr_countdown = ITR_COUNTDOWN_START;
384 q_vector->num_ringpairs++; 384 q_vector->num_ringpairs++;
385 wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, v_idx - 1), q_vector->tx.itr); 385 wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, q_vector->reg_idx),
386 q_vector->tx.target_itr);
387 q_vector->tx.current_itr = q_vector->tx.target_itr;
386} 388}
387 389
388/** 390/**
@@ -783,7 +785,7 @@ static int i40evf_vlan_rx_kill_vid(struct net_device *netdev,
783 **/ 785 **/
784static struct 786static struct
785i40evf_mac_filter *i40evf_find_filter(struct i40evf_adapter *adapter, 787i40evf_mac_filter *i40evf_find_filter(struct i40evf_adapter *adapter,
786 u8 *macaddr) 788 const u8 *macaddr)
787{ 789{
788 struct i40evf_mac_filter *f; 790 struct i40evf_mac_filter *f;
789 791
@@ -806,7 +808,7 @@ i40evf_mac_filter *i40evf_find_filter(struct i40evf_adapter *adapter,
806 **/ 808 **/
807static struct 809static struct
808i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter, 810i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
809 u8 *macaddr) 811 const u8 *macaddr)
810{ 812{
811 struct i40evf_mac_filter *f; 813 struct i40evf_mac_filter *f;
812 814
@@ -878,50 +880,64 @@ static int i40evf_set_mac(struct net_device *netdev, void *p)
878} 880}
879 881
880/** 882/**
881 * i40evf_set_rx_mode - NDO callback to set the netdev filters 883 * i40evf_addr_sync - Callback for dev_(mc|uc)_sync to add address
882 * @netdev: network interface device structure 884 * @netdev: the netdevice
883 **/ 885 * @addr: address to add
884static void i40evf_set_rx_mode(struct net_device *netdev) 886 *
887 * Called by __dev_(mc|uc)_sync when an address needs to be added. We call
888 * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
889 */
890static int i40evf_addr_sync(struct net_device *netdev, const u8 *addr)
885{ 891{
886 struct i40evf_adapter *adapter = netdev_priv(netdev); 892 struct i40evf_adapter *adapter = netdev_priv(netdev);
887 struct i40evf_mac_filter *f, *ftmp;
888 struct netdev_hw_addr *uca;
889 struct netdev_hw_addr *mca;
890 struct netdev_hw_addr *ha;
891
892 /* add addr if not already in the filter list */
893 netdev_for_each_uc_addr(uca, netdev) {
894 i40evf_add_filter(adapter, uca->addr);
895 }
896 netdev_for_each_mc_addr(mca, netdev) {
897 i40evf_add_filter(adapter, mca->addr);
898 }
899
900 spin_lock_bh(&adapter->mac_vlan_list_lock);
901
902 list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
903 netdev_for_each_mc_addr(mca, netdev)
904 if (ether_addr_equal(mca->addr, f->macaddr))
905 goto bottom_of_search_loop;
906 893
907 netdev_for_each_uc_addr(uca, netdev) 894 if (i40evf_add_filter(adapter, addr))
908 if (ether_addr_equal(uca->addr, f->macaddr)) 895 return 0;
909 goto bottom_of_search_loop; 896 else
897 return -ENOMEM;
898}
910 899
911 for_each_dev_addr(netdev, ha) 900/**
912 if (ether_addr_equal(ha->addr, f->macaddr)) 901 * i40evf_addr_unsync - Callback for dev_(mc|uc)_sync to remove address
913 goto bottom_of_search_loop; 902 * @netdev: the netdevice
903 * @addr: address to add
904 *
905 * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call
906 * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
907 */
908static int i40evf_addr_unsync(struct net_device *netdev, const u8 *addr)
909{
910 struct i40evf_adapter *adapter = netdev_priv(netdev);
911 struct i40evf_mac_filter *f;
914 912
915 if (ether_addr_equal(f->macaddr, adapter->hw.mac.addr)) 913 /* Under some circumstances, we might receive a request to delete
916 goto bottom_of_search_loop; 914 * our own device address from our uc list. Because we store the
915 * device address in the VSI's MAC/VLAN filter list, we need to ignore
916 * such requests and not delete our device address from this list.
917 */
918 if (ether_addr_equal(addr, netdev->dev_addr))
919 return 0;
917 920
918 /* f->macaddr wasn't found in uc, mc, or ha list so delete it */ 921 f = i40evf_find_filter(adapter, addr);
922 if (f) {
919 f->remove = true; 923 f->remove = true;
920 adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER; 924 adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
921
922bottom_of_search_loop:
923 continue;
924 } 925 }
926 return 0;
927}
928
929/**
930 * i40evf_set_rx_mode - NDO callback to set the netdev filters
931 * @netdev: network interface device structure
932 **/
933static void i40evf_set_rx_mode(struct net_device *netdev)
934{
935 struct i40evf_adapter *adapter = netdev_priv(netdev);
936
937 spin_lock_bh(&adapter->mac_vlan_list_lock);
938 __dev_uc_sync(netdev, i40evf_addr_sync, i40evf_addr_unsync);
939 __dev_mc_sync(netdev, i40evf_addr_sync, i40evf_addr_unsync);
940 spin_unlock_bh(&adapter->mac_vlan_list_lock);
925 941
926 if (netdev->flags & IFF_PROMISC && 942 if (netdev->flags & IFF_PROMISC &&
927 !(adapter->flags & I40EVF_FLAG_PROMISC_ON)) 943 !(adapter->flags & I40EVF_FLAG_PROMISC_ON))
@@ -936,8 +952,6 @@ bottom_of_search_loop:
936 else if (!(netdev->flags & IFF_ALLMULTI) && 952 else if (!(netdev->flags & IFF_ALLMULTI) &&
937 adapter->flags & I40EVF_FLAG_ALLMULTI_ON) 953 adapter->flags & I40EVF_FLAG_ALLMULTI_ON)
938 adapter->aq_required |= I40EVF_FLAG_AQ_RELEASE_ALLMULTI; 954 adapter->aq_required |= I40EVF_FLAG_AQ_RELEASE_ALLMULTI;
939
940 spin_unlock_bh(&adapter->mac_vlan_list_lock);
941} 955}
942 956
943/** 957/**
@@ -1025,7 +1039,9 @@ static void i40evf_up_complete(struct i40evf_adapter *adapter)
1025void i40evf_down(struct i40evf_adapter *adapter) 1039void i40evf_down(struct i40evf_adapter *adapter)
1026{ 1040{
1027 struct net_device *netdev = adapter->netdev; 1041 struct net_device *netdev = adapter->netdev;
1042 struct i40evf_vlan_filter *vlf;
1028 struct i40evf_mac_filter *f; 1043 struct i40evf_mac_filter *f;
1044 struct i40evf_cloud_filter *cf;
1029 1045
1030 if (adapter->state <= __I40EVF_DOWN_PENDING) 1046 if (adapter->state <= __I40EVF_DOWN_PENDING)
1031 return; 1047 return;
@@ -1038,17 +1054,29 @@ void i40evf_down(struct i40evf_adapter *adapter)
1038 1054
1039 spin_lock_bh(&adapter->mac_vlan_list_lock); 1055 spin_lock_bh(&adapter->mac_vlan_list_lock);
1040 1056
1057 /* clear the sync flag on all filters */
1058 __dev_uc_unsync(adapter->netdev, NULL);
1059 __dev_mc_unsync(adapter->netdev, NULL);
1060
1041 /* remove all MAC filters */ 1061 /* remove all MAC filters */
1042 list_for_each_entry(f, &adapter->mac_filter_list, list) { 1062 list_for_each_entry(f, &adapter->mac_filter_list, list) {
1043 f->remove = true; 1063 f->remove = true;
1044 } 1064 }
1065
1045 /* remove all VLAN filters */ 1066 /* remove all VLAN filters */
1046 list_for_each_entry(f, &adapter->vlan_filter_list, list) { 1067 list_for_each_entry(vlf, &adapter->vlan_filter_list, list) {
1047 f->remove = true; 1068 vlf->remove = true;
1048 } 1069 }
1049 1070
1050 spin_unlock_bh(&adapter->mac_vlan_list_lock); 1071 spin_unlock_bh(&adapter->mac_vlan_list_lock);
1051 1072
1073 /* remove all cloud filters */
1074 spin_lock_bh(&adapter->cloud_filter_list_lock);
1075 list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
1076 cf->del = true;
1077 }
1078 spin_unlock_bh(&adapter->cloud_filter_list_lock);
1079
1052 if (!(adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) && 1080 if (!(adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) &&
1053 adapter->state != __I40EVF_RESETTING) { 1081 adapter->state != __I40EVF_RESETTING) {
1054 /* cancel any current operation */ 1082 /* cancel any current operation */
@@ -1059,6 +1087,7 @@ void i40evf_down(struct i40evf_adapter *adapter)
1059 */ 1087 */
1060 adapter->aq_required = I40EVF_FLAG_AQ_DEL_MAC_FILTER; 1088 adapter->aq_required = I40EVF_FLAG_AQ_DEL_MAC_FILTER;
1061 adapter->aq_required |= I40EVF_FLAG_AQ_DEL_VLAN_FILTER; 1089 adapter->aq_required |= I40EVF_FLAG_AQ_DEL_VLAN_FILTER;
1090 adapter->aq_required |= I40EVF_FLAG_AQ_DEL_CLOUD_FILTER;
1062 adapter->aq_required |= I40EVF_FLAG_AQ_DISABLE_QUEUES; 1091 adapter->aq_required |= I40EVF_FLAG_AQ_DISABLE_QUEUES;
1063 } 1092 }
1064 1093
@@ -1144,6 +1173,9 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
1144 */ 1173 */
1145 if (adapter->num_req_queues) 1174 if (adapter->num_req_queues)
1146 num_active_queues = adapter->num_req_queues; 1175 num_active_queues = adapter->num_req_queues;
1176 else if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
1177 adapter->num_tc)
1178 num_active_queues = adapter->ch_config.total_qps;
1147 else 1179 else
1148 num_active_queues = min_t(int, 1180 num_active_queues = min_t(int,
1149 adapter->vsi_res->num_queue_pairs, 1181 adapter->vsi_res->num_queue_pairs,
@@ -1169,7 +1201,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
1169 tx_ring->netdev = adapter->netdev; 1201 tx_ring->netdev = adapter->netdev;
1170 tx_ring->dev = &adapter->pdev->dev; 1202 tx_ring->dev = &adapter->pdev->dev;
1171 tx_ring->count = adapter->tx_desc_count; 1203 tx_ring->count = adapter->tx_desc_count;
1172 tx_ring->tx_itr_setting = I40E_ITR_TX_DEF; 1204 tx_ring->itr_setting = I40E_ITR_TX_DEF;
1173 if (adapter->flags & I40EVF_FLAG_WB_ON_ITR_CAPABLE) 1205 if (adapter->flags & I40EVF_FLAG_WB_ON_ITR_CAPABLE)
1174 tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR; 1206 tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR;
1175 1207
@@ -1178,7 +1210,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
1178 rx_ring->netdev = adapter->netdev; 1210 rx_ring->netdev = adapter->netdev;
1179 rx_ring->dev = &adapter->pdev->dev; 1211 rx_ring->dev = &adapter->pdev->dev;
1180 rx_ring->count = adapter->rx_desc_count; 1212 rx_ring->count = adapter->rx_desc_count;
1181 rx_ring->rx_itr_setting = I40E_ITR_RX_DEF; 1213 rx_ring->itr_setting = I40E_ITR_RX_DEF;
1182 } 1214 }
1183 1215
1184 adapter->num_active_queues = num_active_queues; 1216 adapter->num_active_queues = num_active_queues;
@@ -1471,6 +1503,16 @@ int i40evf_init_interrupt_scheme(struct i40evf_adapter *adapter)
1471 goto err_alloc_q_vectors; 1503 goto err_alloc_q_vectors;
1472 } 1504 }
1473 1505
1506 /* If we've made it so far while ADq flag being ON, then we haven't
1507 * bailed out anywhere in middle. And ADq isn't just enabled but actual
1508 * resources have been allocated in the reset path.
1509 * Now we can truly claim that ADq is enabled.
1510 */
1511 if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
1512 adapter->num_tc)
1513 dev_info(&adapter->pdev->dev, "ADq Enabled, %u TCs created",
1514 adapter->num_tc);
1515
1474 dev_info(&adapter->pdev->dev, "Multiqueue %s: Queue pair count = %u", 1516 dev_info(&adapter->pdev->dev, "Multiqueue %s: Queue pair count = %u",
1475 (adapter->num_active_queues > 1) ? "Enabled" : "Disabled", 1517 (adapter->num_active_queues > 1) ? "Enabled" : "Disabled",
1476 adapter->num_active_queues); 1518 adapter->num_active_queues);
@@ -1712,6 +1754,27 @@ static void i40evf_watchdog_task(struct work_struct *work)
1712 i40evf_set_promiscuous(adapter, 0); 1754 i40evf_set_promiscuous(adapter, 0);
1713 goto watchdog_done; 1755 goto watchdog_done;
1714 } 1756 }
1757
1758 if (adapter->aq_required & I40EVF_FLAG_AQ_ENABLE_CHANNELS) {
1759 i40evf_enable_channels(adapter);
1760 goto watchdog_done;
1761 }
1762
1763 if (adapter->aq_required & I40EVF_FLAG_AQ_DISABLE_CHANNELS) {
1764 i40evf_disable_channels(adapter);
1765 goto watchdog_done;
1766 }
1767
1768 if (adapter->aq_required & I40EVF_FLAG_AQ_ADD_CLOUD_FILTER) {
1769 i40evf_add_cloud_filter(adapter);
1770 goto watchdog_done;
1771 }
1772
1773 if (adapter->aq_required & I40EVF_FLAG_AQ_DEL_CLOUD_FILTER) {
1774 i40evf_del_cloud_filter(adapter);
1775 goto watchdog_done;
1776 }
1777
1715 schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5)); 1778 schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5));
1716 1779
1717 if (adapter->state == __I40EVF_RUNNING) 1780 if (adapter->state == __I40EVF_RUNNING)
@@ -1735,6 +1798,7 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter)
1735{ 1798{
1736 struct i40evf_mac_filter *f, *ftmp; 1799 struct i40evf_mac_filter *f, *ftmp;
1737 struct i40evf_vlan_filter *fv, *fvtmp; 1800 struct i40evf_vlan_filter *fv, *fvtmp;
1801 struct i40evf_cloud_filter *cf, *cftmp;
1738 1802
1739 adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED; 1803 adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED;
1740 1804
@@ -1756,7 +1820,7 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter)
1756 1820
1757 spin_lock_bh(&adapter->mac_vlan_list_lock); 1821 spin_lock_bh(&adapter->mac_vlan_list_lock);
1758 1822
1759 /* Delete all of the filters, both MAC and VLAN. */ 1823 /* Delete all of the filters */
1760 list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { 1824 list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
1761 list_del(&f->list); 1825 list_del(&f->list);
1762 kfree(f); 1826 kfree(f);
@@ -1769,6 +1833,14 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter)
1769 1833
1770 spin_unlock_bh(&adapter->mac_vlan_list_lock); 1834 spin_unlock_bh(&adapter->mac_vlan_list_lock);
1771 1835
1836 spin_lock_bh(&adapter->cloud_filter_list_lock);
1837 list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
1838 list_del(&cf->list);
1839 kfree(cf);
1840 adapter->num_cloud_filters--;
1841 }
1842 spin_unlock_bh(&adapter->cloud_filter_list_lock);
1843
1772 i40evf_free_misc_irq(adapter); 1844 i40evf_free_misc_irq(adapter);
1773 i40evf_reset_interrupt_capability(adapter); 1845 i40evf_reset_interrupt_capability(adapter);
1774 i40evf_free_queues(adapter); 1846 i40evf_free_queues(adapter);
@@ -1798,9 +1870,11 @@ static void i40evf_reset_task(struct work_struct *work)
1798 struct i40evf_adapter *adapter = container_of(work, 1870 struct i40evf_adapter *adapter = container_of(work,
1799 struct i40evf_adapter, 1871 struct i40evf_adapter,
1800 reset_task); 1872 reset_task);
1873 struct virtchnl_vf_resource *vfres = adapter->vf_res;
1801 struct net_device *netdev = adapter->netdev; 1874 struct net_device *netdev = adapter->netdev;
1802 struct i40e_hw *hw = &adapter->hw; 1875 struct i40e_hw *hw = &adapter->hw;
1803 struct i40evf_vlan_filter *vlf; 1876 struct i40evf_vlan_filter *vlf;
1877 struct i40evf_cloud_filter *cf;
1804 struct i40evf_mac_filter *f; 1878 struct i40evf_mac_filter *f;
1805 u32 reg_val; 1879 u32 reg_val;
1806 int i = 0, err; 1880 int i = 0, err;
@@ -1893,6 +1967,7 @@ continue_reset:
1893 i40evf_free_all_rx_resources(adapter); 1967 i40evf_free_all_rx_resources(adapter);
1894 i40evf_free_all_tx_resources(adapter); 1968 i40evf_free_all_tx_resources(adapter);
1895 1969
1970 adapter->flags |= I40EVF_FLAG_QUEUES_DISABLED;
1896 /* kill and reinit the admin queue */ 1971 /* kill and reinit the admin queue */
1897 i40evf_shutdown_adminq(hw); 1972 i40evf_shutdown_adminq(hw);
1898 adapter->current_op = VIRTCHNL_OP_UNKNOWN; 1973 adapter->current_op = VIRTCHNL_OP_UNKNOWN;
@@ -1924,8 +1999,19 @@ continue_reset:
1924 1999
1925 spin_unlock_bh(&adapter->mac_vlan_list_lock); 2000 spin_unlock_bh(&adapter->mac_vlan_list_lock);
1926 2001
2002 /* check if TCs are running and re-add all cloud filters */
2003 spin_lock_bh(&adapter->cloud_filter_list_lock);
2004 if ((vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
2005 adapter->num_tc) {
2006 list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
2007 cf->add = true;
2008 }
2009 }
2010 spin_unlock_bh(&adapter->cloud_filter_list_lock);
2011
1927 adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER; 2012 adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER;
1928 adapter->aq_required |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER; 2013 adapter->aq_required |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER;
2014 adapter->aq_required |= I40EVF_FLAG_AQ_ADD_CLOUD_FILTER;
1929 i40evf_misc_irq_enable(adapter); 2015 i40evf_misc_irq_enable(adapter);
1930 2016
1931 mod_timer(&adapter->watchdog_timer, jiffies + 2); 2017 mod_timer(&adapter->watchdog_timer, jiffies + 2);
@@ -2191,6 +2277,724 @@ void i40evf_free_all_rx_resources(struct i40evf_adapter *adapter)
2191} 2277}
2192 2278
2193/** 2279/**
2280 * i40evf_validate_tx_bandwidth - validate the max Tx bandwidth
2281 * @adapter: board private structure
2282 * @max_tx_rate: max Tx bw for a tc
2283 **/
2284static int i40evf_validate_tx_bandwidth(struct i40evf_adapter *adapter,
2285 u64 max_tx_rate)
2286{
2287 int speed = 0, ret = 0;
2288
2289 switch (adapter->link_speed) {
2290 case I40E_LINK_SPEED_40GB:
2291 speed = 40000;
2292 break;
2293 case I40E_LINK_SPEED_25GB:
2294 speed = 25000;
2295 break;
2296 case I40E_LINK_SPEED_20GB:
2297 speed = 20000;
2298 break;
2299 case I40E_LINK_SPEED_10GB:
2300 speed = 10000;
2301 break;
2302 case I40E_LINK_SPEED_1GB:
2303 speed = 1000;
2304 break;
2305 case I40E_LINK_SPEED_100MB:
2306 speed = 100;
2307 break;
2308 default:
2309 break;
2310 }
2311
2312 if (max_tx_rate > speed) {
2313 dev_err(&adapter->pdev->dev,
2314 "Invalid tx rate specified\n");
2315 ret = -EINVAL;
2316 }
2317
2318 return ret;
2319}
2320
2321/**
2322 * i40evf_validate_channel_config - validate queue mapping info
2323 * @adapter: board private structure
2324 * @mqprio_qopt: queue parameters
2325 *
2326 * This function validates if the config provided by the user to
2327 * configure queue channels is valid or not. Returns 0 on a valid
2328 * config.
2329 **/
2330static int i40evf_validate_ch_config(struct i40evf_adapter *adapter,
2331 struct tc_mqprio_qopt_offload *mqprio_qopt)
2332{
2333 u64 total_max_rate = 0;
2334 int i, num_qps = 0;
2335 u64 tx_rate = 0;
2336 int ret = 0;
2337
2338 if (mqprio_qopt->qopt.num_tc > I40EVF_MAX_TRAFFIC_CLASS ||
2339 mqprio_qopt->qopt.num_tc < 1)
2340 return -EINVAL;
2341
2342 for (i = 0; i <= mqprio_qopt->qopt.num_tc - 1; i++) {
2343 if (!mqprio_qopt->qopt.count[i] ||
2344 mqprio_qopt->qopt.offset[i] != num_qps)
2345 return -EINVAL;
2346 if (mqprio_qopt->min_rate[i]) {
2347 dev_err(&adapter->pdev->dev,
2348 "Invalid min tx rate (greater than 0) specified\n");
2349 return -EINVAL;
2350 }
2351 /*convert to Mbps */
2352 tx_rate = div_u64(mqprio_qopt->max_rate[i],
2353 I40EVF_MBPS_DIVISOR);
2354 total_max_rate += tx_rate;
2355 num_qps += mqprio_qopt->qopt.count[i];
2356 }
2357 if (num_qps > MAX_QUEUES)
2358 return -EINVAL;
2359
2360 ret = i40evf_validate_tx_bandwidth(adapter, total_max_rate);
2361 return ret;
2362}
2363
2364/**
2365 * i40evf_del_all_cloud_filters - delete all cloud filters
2366 * on the traffic classes
2367 **/
2368static void i40evf_del_all_cloud_filters(struct i40evf_adapter *adapter)
2369{
2370 struct i40evf_cloud_filter *cf, *cftmp;
2371
2372 spin_lock_bh(&adapter->cloud_filter_list_lock);
2373 list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list,
2374 list) {
2375 list_del(&cf->list);
2376 kfree(cf);
2377 adapter->num_cloud_filters--;
2378 }
2379 spin_unlock_bh(&adapter->cloud_filter_list_lock);
2380}
2381
2382/**
2383 * __i40evf_setup_tc - configure multiple traffic classes
2384 * @netdev: network interface device structure
2385 * @type_date: tc offload data
2386 *
2387 * This function processes the config information provided by the
2388 * user to configure traffic classes/queue channels and packages the
2389 * information to request the PF to setup traffic classes.
2390 *
2391 * Returns 0 on success.
2392 **/
2393static int __i40evf_setup_tc(struct net_device *netdev, void *type_data)
2394{
2395 struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
2396 struct i40evf_adapter *adapter = netdev_priv(netdev);
2397 struct virtchnl_vf_resource *vfres = adapter->vf_res;
2398 u8 num_tc = 0, total_qps = 0;
2399 int ret = 0, netdev_tc = 0;
2400 u64 max_tx_rate;
2401 u16 mode;
2402 int i;
2403
2404 num_tc = mqprio_qopt->qopt.num_tc;
2405 mode = mqprio_qopt->mode;
2406
2407 /* delete queue_channel */
2408 if (!mqprio_qopt->qopt.hw) {
2409 if (adapter->ch_config.state == __I40EVF_TC_RUNNING) {
2410 /* reset the tc configuration */
2411 netdev_reset_tc(netdev);
2412 adapter->num_tc = 0;
2413 netif_tx_stop_all_queues(netdev);
2414 netif_tx_disable(netdev);
2415 i40evf_del_all_cloud_filters(adapter);
2416 adapter->aq_required = I40EVF_FLAG_AQ_DISABLE_CHANNELS;
2417 goto exit;
2418 } else {
2419 return -EINVAL;
2420 }
2421 }
2422
2423 /* add queue channel */
2424 if (mode == TC_MQPRIO_MODE_CHANNEL) {
2425 if (!(vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)) {
2426 dev_err(&adapter->pdev->dev, "ADq not supported\n");
2427 return -EOPNOTSUPP;
2428 }
2429 if (adapter->ch_config.state != __I40EVF_TC_INVALID) {
2430 dev_err(&adapter->pdev->dev, "TC configuration already exists\n");
2431 return -EINVAL;
2432 }
2433
2434 ret = i40evf_validate_ch_config(adapter, mqprio_qopt);
2435 if (ret)
2436 return ret;
2437 /* Return if same TC config is requested */
2438 if (adapter->num_tc == num_tc)
2439 return 0;
2440 adapter->num_tc = num_tc;
2441
2442 for (i = 0; i < I40EVF_MAX_TRAFFIC_CLASS; i++) {
2443 if (i < num_tc) {
2444 adapter->ch_config.ch_info[i].count =
2445 mqprio_qopt->qopt.count[i];
2446 adapter->ch_config.ch_info[i].offset =
2447 mqprio_qopt->qopt.offset[i];
2448 total_qps += mqprio_qopt->qopt.count[i];
2449 max_tx_rate = mqprio_qopt->max_rate[i];
2450 /* convert to Mbps */
2451 max_tx_rate = div_u64(max_tx_rate,
2452 I40EVF_MBPS_DIVISOR);
2453 adapter->ch_config.ch_info[i].max_tx_rate =
2454 max_tx_rate;
2455 } else {
2456 adapter->ch_config.ch_info[i].count = 1;
2457 adapter->ch_config.ch_info[i].offset = 0;
2458 }
2459 }
2460 adapter->ch_config.total_qps = total_qps;
2461 netif_tx_stop_all_queues(netdev);
2462 netif_tx_disable(netdev);
2463 adapter->aq_required |= I40EVF_FLAG_AQ_ENABLE_CHANNELS;
2464 netdev_reset_tc(netdev);
2465 /* Report the tc mapping up the stack */
2466 netdev_set_num_tc(adapter->netdev, num_tc);
2467 for (i = 0; i < I40EVF_MAX_TRAFFIC_CLASS; i++) {
2468 u16 qcount = mqprio_qopt->qopt.count[i];
2469 u16 qoffset = mqprio_qopt->qopt.offset[i];
2470
2471 if (i < num_tc)
2472 netdev_set_tc_queue(netdev, netdev_tc++, qcount,
2473 qoffset);
2474 }
2475 }
2476exit:
2477 return ret;
2478}
2479
2480/**
2481 * i40evf_parse_cls_flower - Parse tc flower filters provided by kernel
2482 * @adapter: board private structure
2483 * @cls_flower: pointer to struct tc_cls_flower_offload
2484 * @filter: pointer to cloud filter structure
2485 */
2486static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
2487 struct tc_cls_flower_offload *f,
2488 struct i40evf_cloud_filter *filter)
2489{
2490 u16 n_proto_mask = 0;
2491 u16 n_proto_key = 0;
2492 u8 field_flags = 0;
2493 u16 addr_type = 0;
2494 u16 n_proto = 0;
2495 int i = 0;
2496
2497 if (f->dissector->used_keys &
2498 ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
2499 BIT(FLOW_DISSECTOR_KEY_BASIC) |
2500 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
2501 BIT(FLOW_DISSECTOR_KEY_VLAN) |
2502 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
2503 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
2504 BIT(FLOW_DISSECTOR_KEY_PORTS) |
2505 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) {
2506 dev_err(&adapter->pdev->dev, "Unsupported key used: 0x%x\n",
2507 f->dissector->used_keys);
2508 return -EOPNOTSUPP;
2509 }
2510
2511 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
2512 struct flow_dissector_key_keyid *mask =
2513 skb_flow_dissector_target(f->dissector,
2514 FLOW_DISSECTOR_KEY_ENC_KEYID,
2515 f->mask);
2516
2517 if (mask->keyid != 0)
2518 field_flags |= I40EVF_CLOUD_FIELD_TEN_ID;
2519 }
2520
2521 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
2522 struct flow_dissector_key_basic *key =
2523 skb_flow_dissector_target(f->dissector,
2524 FLOW_DISSECTOR_KEY_BASIC,
2525 f->key);
2526
2527 struct flow_dissector_key_basic *mask =
2528 skb_flow_dissector_target(f->dissector,
2529 FLOW_DISSECTOR_KEY_BASIC,
2530 f->mask);
2531 n_proto_key = ntohs(key->n_proto);
2532 n_proto_mask = ntohs(mask->n_proto);
2533
2534 if (n_proto_key == ETH_P_ALL) {
2535 n_proto_key = 0;
2536 n_proto_mask = 0;
2537 }
2538 n_proto = n_proto_key & n_proto_mask;
2539 if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6)
2540 return -EINVAL;
2541 if (n_proto == ETH_P_IPV6) {
2542 /* specify flow type as TCP IPv6 */
2543 filter->f.flow_type = VIRTCHNL_TCP_V6_FLOW;
2544 }
2545
2546 if (key->ip_proto != IPPROTO_TCP) {
2547 dev_info(&adapter->pdev->dev, "Only TCP transport is supported\n");
2548 return -EINVAL;
2549 }
2550 }
2551
2552 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
2553 struct flow_dissector_key_eth_addrs *key =
2554 skb_flow_dissector_target(f->dissector,
2555 FLOW_DISSECTOR_KEY_ETH_ADDRS,
2556 f->key);
2557
2558 struct flow_dissector_key_eth_addrs *mask =
2559 skb_flow_dissector_target(f->dissector,
2560 FLOW_DISSECTOR_KEY_ETH_ADDRS,
2561 f->mask);
2562 /* use is_broadcast and is_zero to check for all 0xf or 0 */
2563 if (!is_zero_ether_addr(mask->dst)) {
2564 if (is_broadcast_ether_addr(mask->dst)) {
2565 field_flags |= I40EVF_CLOUD_FIELD_OMAC;
2566 } else {
2567 dev_err(&adapter->pdev->dev, "Bad ether dest mask %pM\n",
2568 mask->dst);
2569 return I40E_ERR_CONFIG;
2570 }
2571 }
2572
2573 if (!is_zero_ether_addr(mask->src)) {
2574 if (is_broadcast_ether_addr(mask->src)) {
2575 field_flags |= I40EVF_CLOUD_FIELD_IMAC;
2576 } else {
2577 dev_err(&adapter->pdev->dev, "Bad ether src mask %pM\n",
2578 mask->src);
2579 return I40E_ERR_CONFIG;
2580 }
2581 }
2582
2583 if (!is_zero_ether_addr(key->dst))
2584 if (is_valid_ether_addr(key->dst) ||
2585 is_multicast_ether_addr(key->dst)) {
2586 /* set the mask if a valid dst_mac address */
2587 for (i = 0; i < ETH_ALEN; i++)
2588 filter->f.mask.tcp_spec.dst_mac[i] |=
2589 0xff;
2590 ether_addr_copy(filter->f.data.tcp_spec.dst_mac,
2591 key->dst);
2592 }
2593
2594 if (!is_zero_ether_addr(key->src))
2595 if (is_valid_ether_addr(key->src) ||
2596 is_multicast_ether_addr(key->src)) {
2597 /* set the mask if a valid dst_mac address */
2598 for (i = 0; i < ETH_ALEN; i++)
2599 filter->f.mask.tcp_spec.src_mac[i] |=
2600 0xff;
2601 ether_addr_copy(filter->f.data.tcp_spec.src_mac,
2602 key->src);
2603 }
2604 }
2605
2606 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
2607 struct flow_dissector_key_vlan *key =
2608 skb_flow_dissector_target(f->dissector,
2609 FLOW_DISSECTOR_KEY_VLAN,
2610 f->key);
2611 struct flow_dissector_key_vlan *mask =
2612 skb_flow_dissector_target(f->dissector,
2613 FLOW_DISSECTOR_KEY_VLAN,
2614 f->mask);
2615
2616 if (mask->vlan_id) {
2617 if (mask->vlan_id == VLAN_VID_MASK) {
2618 field_flags |= I40EVF_CLOUD_FIELD_IVLAN;
2619 } else {
2620 dev_err(&adapter->pdev->dev, "Bad vlan mask %u\n",
2621 mask->vlan_id);
2622 return I40E_ERR_CONFIG;
2623 }
2624 }
2625 filter->f.mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
2626 filter->f.data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id);
2627 }
2628
2629 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
2630 struct flow_dissector_key_control *key =
2631 skb_flow_dissector_target(f->dissector,
2632 FLOW_DISSECTOR_KEY_CONTROL,
2633 f->key);
2634
2635 addr_type = key->addr_type;
2636 }
2637
2638 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2639 struct flow_dissector_key_ipv4_addrs *key =
2640 skb_flow_dissector_target(f->dissector,
2641 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
2642 f->key);
2643 struct flow_dissector_key_ipv4_addrs *mask =
2644 skb_flow_dissector_target(f->dissector,
2645 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
2646 f->mask);
2647
2648 if (mask->dst) {
2649 if (mask->dst == cpu_to_be32(0xffffffff)) {
2650 field_flags |= I40EVF_CLOUD_FIELD_IIP;
2651 } else {
2652 dev_err(&adapter->pdev->dev, "Bad ip dst mask 0x%08x\n",
2653 be32_to_cpu(mask->dst));
2654 return I40E_ERR_CONFIG;
2655 }
2656 }
2657
2658 if (mask->src) {
2659 if (mask->src == cpu_to_be32(0xffffffff)) {
2660 field_flags |= I40EVF_CLOUD_FIELD_IIP;
2661 } else {
2662 dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n",
2663 be32_to_cpu(mask->dst));
2664 return I40E_ERR_CONFIG;
2665 }
2666 }
2667
2668 if (field_flags & I40EVF_CLOUD_FIELD_TEN_ID) {
2669 dev_info(&adapter->pdev->dev, "Tenant id not allowed for ip filter\n");
2670 return I40E_ERR_CONFIG;
2671 }
2672 if (key->dst) {
2673 filter->f.mask.tcp_spec.dst_ip[0] |=
2674 cpu_to_be32(0xffffffff);
2675 filter->f.data.tcp_spec.dst_ip[0] = key->dst;
2676 }
2677 if (key->src) {
2678 filter->f.mask.tcp_spec.src_ip[0] |=
2679 cpu_to_be32(0xffffffff);
2680 filter->f.data.tcp_spec.src_ip[0] = key->src;
2681 }
2682 }
2683
2684 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2685 struct flow_dissector_key_ipv6_addrs *key =
2686 skb_flow_dissector_target(f->dissector,
2687 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
2688 f->key);
2689 struct flow_dissector_key_ipv6_addrs *mask =
2690 skb_flow_dissector_target(f->dissector,
2691 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
2692 f->mask);
2693
2694 /* validate mask, make sure it is not IPV6_ADDR_ANY */
2695 if (ipv6_addr_any(&mask->dst)) {
2696 dev_err(&adapter->pdev->dev, "Bad ipv6 dst mask 0x%02x\n",
2697 IPV6_ADDR_ANY);
2698 return I40E_ERR_CONFIG;
2699 }
2700
2701 /* src and dest IPv6 address should not be LOOPBACK
2702 * (0:0:0:0:0:0:0:1) which can be represented as ::1
2703 */
2704 if (ipv6_addr_loopback(&key->dst) ||
2705 ipv6_addr_loopback(&key->src)) {
2706 dev_err(&adapter->pdev->dev,
2707 "ipv6 addr should not be loopback\n");
2708 return I40E_ERR_CONFIG;
2709 }
2710 if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src))
2711 field_flags |= I40EVF_CLOUD_FIELD_IIP;
2712
2713 if (key->dst.s6_addr) {
2714 for (i = 0; i < 4; i++)
2715 filter->f.mask.tcp_spec.dst_ip[i] |=
2716 cpu_to_be32(0xffffffff);
2717 memcpy(&filter->f.data.tcp_spec.dst_ip,
2718 &key->dst.s6_addr32,
2719 sizeof(filter->f.data.tcp_spec.dst_ip));
2720 }
2721 if (key->src.s6_addr) {
2722 for (i = 0; i < 4; i++)
2723 filter->f.mask.tcp_spec.src_ip[i] |=
2724 cpu_to_be32(0xffffffff);
2725 memcpy(&filter->f.data.tcp_spec.src_ip,
2726 &key->src.s6_addr32,
2727 sizeof(filter->f.data.tcp_spec.src_ip));
2728 }
2729 }
2730 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
2731 struct flow_dissector_key_ports *key =
2732 skb_flow_dissector_target(f->dissector,
2733 FLOW_DISSECTOR_KEY_PORTS,
2734 f->key);
2735 struct flow_dissector_key_ports *mask =
2736 skb_flow_dissector_target(f->dissector,
2737 FLOW_DISSECTOR_KEY_PORTS,
2738 f->mask);
2739
2740 if (mask->src) {
2741 if (mask->src == cpu_to_be16(0xffff)) {
2742 field_flags |= I40EVF_CLOUD_FIELD_IIP;
2743 } else {
2744 dev_err(&adapter->pdev->dev, "Bad src port mask %u\n",
2745 be16_to_cpu(mask->src));
2746 return I40E_ERR_CONFIG;
2747 }
2748 }
2749
2750 if (mask->dst) {
2751 if (mask->dst == cpu_to_be16(0xffff)) {
2752 field_flags |= I40EVF_CLOUD_FIELD_IIP;
2753 } else {
2754 dev_err(&adapter->pdev->dev, "Bad dst port mask %u\n",
2755 be16_to_cpu(mask->dst));
2756 return I40E_ERR_CONFIG;
2757 }
2758 }
2759 if (key->dst) {
2760 filter->f.mask.tcp_spec.dst_port |= cpu_to_be16(0xffff);
2761 filter->f.data.tcp_spec.dst_port = key->dst;
2762 }
2763
2764 if (key->src) {
2765 filter->f.mask.tcp_spec.src_port |= cpu_to_be16(0xffff);
2766 filter->f.data.tcp_spec.src_port = key->dst;
2767 }
2768 }
2769 filter->f.field_flags = field_flags;
2770
2771 return 0;
2772}
2773
2774/**
2775 * i40evf_handle_tclass - Forward to a traffic class on the device
2776 * @adapter: board private structure
2777 * @tc: traffic class index on the device
2778 * @filter: pointer to cloud filter structure
2779 */
2780static int i40evf_handle_tclass(struct i40evf_adapter *adapter, u32 tc,
2781 struct i40evf_cloud_filter *filter)
2782{
2783 if (tc == 0)
2784 return 0;
2785 if (tc < adapter->num_tc) {
2786 if (!filter->f.data.tcp_spec.dst_port) {
2787 dev_err(&adapter->pdev->dev,
2788 "Specify destination port to redirect to traffic class other than TC0\n");
2789 return -EINVAL;
2790 }
2791 }
2792 /* redirect to a traffic class on the same device */
2793 filter->f.action = VIRTCHNL_ACTION_TC_REDIRECT;
2794 filter->f.action_meta = tc;
2795 return 0;
2796}
2797
2798/**
2799 * i40evf_configure_clsflower - Add tc flower filters
2800 * @adapter: board private structure
2801 * @cls_flower: Pointer to struct tc_cls_flower_offload
2802 */
2803static int i40evf_configure_clsflower(struct i40evf_adapter *adapter,
2804 struct tc_cls_flower_offload *cls_flower)
2805{
2806 int tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid);
2807 struct i40evf_cloud_filter *filter = NULL;
2808 int err = 0, count = 50;
2809
2810 while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK,
2811 &adapter->crit_section)) {
2812 udelay(1);
2813 if (--count == 0)
2814 return -EINVAL;
2815 }
2816
2817 if (tc < 0) {
2818 dev_err(&adapter->pdev->dev, "Invalid traffic class\n");
2819 return -EINVAL;
2820 }
2821
2822 filter = kzalloc(sizeof(*filter), GFP_KERNEL);
2823 if (!filter) {
2824 err = -ENOMEM;
2825 goto clearout;
2826 }
2827 filter->cookie = cls_flower->cookie;
2828
2829 /* set the mask to all zeroes to begin with */
2830 memset(&filter->f.mask.tcp_spec, 0, sizeof(struct virtchnl_l4_spec));
2831 /* start out with flow type and eth type IPv4 to begin with */
2832 filter->f.flow_type = VIRTCHNL_TCP_V4_FLOW;
2833 err = i40evf_parse_cls_flower(adapter, cls_flower, filter);
2834 if (err < 0)
2835 goto err;
2836
2837 err = i40evf_handle_tclass(adapter, tc, filter);
2838 if (err < 0)
2839 goto err;
2840
2841 /* add filter to the list */
2842 spin_lock_bh(&adapter->cloud_filter_list_lock);
2843 list_add_tail(&filter->list, &adapter->cloud_filter_list);
2844 adapter->num_cloud_filters++;
2845 filter->add = true;
2846 adapter->aq_required |= I40EVF_FLAG_AQ_ADD_CLOUD_FILTER;
2847 spin_unlock_bh(&adapter->cloud_filter_list_lock);
2848err:
2849 if (err)
2850 kfree(filter);
2851clearout:
2852 clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
2853 return err;
2854}
2855
2856/* i40evf_find_cf - Find the cloud filter in the list
2857 * @adapter: Board private structure
2858 * @cookie: filter specific cookie
2859 *
2860 * Returns ptr to the filter object or NULL. Must be called while holding the
2861 * cloud_filter_list_lock.
2862 */
2863static struct i40evf_cloud_filter *i40evf_find_cf(struct i40evf_adapter *adapter,
2864 unsigned long *cookie)
2865{
2866 struct i40evf_cloud_filter *filter = NULL;
2867
2868 if (!cookie)
2869 return NULL;
2870
2871 list_for_each_entry(filter, &adapter->cloud_filter_list, list) {
2872 if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie)))
2873 return filter;
2874 }
2875 return NULL;
2876}
2877
2878/**
2879 * i40evf_delete_clsflower - Remove tc flower filters
2880 * @adapter: board private structure
2881 * @cls_flower: Pointer to struct tc_cls_flower_offload
2882 */
2883static int i40evf_delete_clsflower(struct i40evf_adapter *adapter,
2884 struct tc_cls_flower_offload *cls_flower)
2885{
2886 struct i40evf_cloud_filter *filter = NULL;
2887 int err = 0;
2888
2889 spin_lock_bh(&adapter->cloud_filter_list_lock);
2890 filter = i40evf_find_cf(adapter, &cls_flower->cookie);
2891 if (filter) {
2892 filter->del = true;
2893 adapter->aq_required |= I40EVF_FLAG_AQ_DEL_CLOUD_FILTER;
2894 } else {
2895 err = -EINVAL;
2896 }
2897 spin_unlock_bh(&adapter->cloud_filter_list_lock);
2898
2899 return err;
2900}
2901
2902/**
2903 * i40evf_setup_tc_cls_flower - flower classifier offloads
2904 * @netdev: net device to configure
2905 * @type_data: offload data
2906 */
2907static int i40evf_setup_tc_cls_flower(struct i40evf_adapter *adapter,
2908 struct tc_cls_flower_offload *cls_flower)
2909{
2910 if (cls_flower->common.chain_index)
2911 return -EOPNOTSUPP;
2912
2913 switch (cls_flower->command) {
2914 case TC_CLSFLOWER_REPLACE:
2915 return i40evf_configure_clsflower(adapter, cls_flower);
2916 case TC_CLSFLOWER_DESTROY:
2917 return i40evf_delete_clsflower(adapter, cls_flower);
2918 case TC_CLSFLOWER_STATS:
2919 return -EOPNOTSUPP;
2920 default:
2921 return -EINVAL;
2922 }
2923}
2924
2925/**
2926 * i40evf_setup_tc_block_cb - block callback for tc
2927 * @type: type of offload
2928 * @type_data: offload data
2929 * @cb_priv:
2930 *
2931 * This function is the block callback for traffic classes
2932 **/
2933static int i40evf_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
2934 void *cb_priv)
2935{
2936 switch (type) {
2937 case TC_SETUP_CLSFLOWER:
2938 return i40evf_setup_tc_cls_flower(cb_priv, type_data);
2939 default:
2940 return -EOPNOTSUPP;
2941 }
2942}
2943
2944/**
2945 * i40evf_setup_tc_block - register callbacks for tc
2946 * @netdev: network interface device structure
2947 * @f: tc offload data
2948 *
2949 * This function registers block callbacks for tc
2950 * offloads
2951 **/
2952static int i40evf_setup_tc_block(struct net_device *dev,
2953 struct tc_block_offload *f)
2954{
2955 struct i40evf_adapter *adapter = netdev_priv(dev);
2956
2957 if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
2958 return -EOPNOTSUPP;
2959
2960 switch (f->command) {
2961 case TC_BLOCK_BIND:
2962 return tcf_block_cb_register(f->block, i40evf_setup_tc_block_cb,
2963 adapter, adapter);
2964 case TC_BLOCK_UNBIND:
2965 tcf_block_cb_unregister(f->block, i40evf_setup_tc_block_cb,
2966 adapter);
2967 return 0;
2968 default:
2969 return -EOPNOTSUPP;
2970 }
2971}
2972
2973/**
2974 * i40evf_setup_tc - configure multiple traffic classes
2975 * @netdev: network interface device structure
2976 * @type: type of offload
2977 * @type_date: tc offload data
2978 *
2979 * This function is the callback to ndo_setup_tc in the
2980 * netdev_ops.
2981 *
2982 * Returns 0 on success
2983 **/
2984static int i40evf_setup_tc(struct net_device *netdev, enum tc_setup_type type,
2985 void *type_data)
2986{
2987 switch (type) {
2988 case TC_SETUP_QDISC_MQPRIO:
2989 return __i40evf_setup_tc(netdev, type_data);
2990 case TC_SETUP_BLOCK:
2991 return i40evf_setup_tc_block(netdev, type_data);
2992 default:
2993 return -EOPNOTSUPP;
2994 }
2995}
2996
2997/**
2194 * i40evf_open - Called when a network interface is made active 2998 * i40evf_open - Called when a network interface is made active
2195 * @netdev: network interface device structure 2999 * @netdev: network interface device structure
2196 * 3000 *
@@ -2457,6 +3261,7 @@ static const struct net_device_ops i40evf_netdev_ops = {
2457#ifdef CONFIG_NET_POLL_CONTROLLER 3261#ifdef CONFIG_NET_POLL_CONTROLLER
2458 .ndo_poll_controller = i40evf_netpoll, 3262 .ndo_poll_controller = i40evf_netpoll,
2459#endif 3263#endif
3264 .ndo_setup_tc = i40evf_setup_tc,
2460}; 3265};
2461 3266
2462/** 3267/**
@@ -2571,6 +3376,9 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
2571 if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN) 3376 if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
2572 hw_features |= (NETIF_F_HW_VLAN_CTAG_TX | 3377 hw_features |= (NETIF_F_HW_VLAN_CTAG_TX |
2573 NETIF_F_HW_VLAN_CTAG_RX); 3378 NETIF_F_HW_VLAN_CTAG_RX);
3379 /* Enable cloud filter if ADQ is supported */
3380 if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)
3381 hw_features |= NETIF_F_HW_TC;
2574 3382
2575 netdev->hw_features |= hw_features; 3383 netdev->hw_features |= hw_features;
2576 3384
@@ -2938,9 +3746,11 @@ static int i40evf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2938 mutex_init(&hw->aq.arq_mutex); 3746 mutex_init(&hw->aq.arq_mutex);
2939 3747
2940 spin_lock_init(&adapter->mac_vlan_list_lock); 3748 spin_lock_init(&adapter->mac_vlan_list_lock);
3749 spin_lock_init(&adapter->cloud_filter_list_lock);
2941 3750
2942 INIT_LIST_HEAD(&adapter->mac_filter_list); 3751 INIT_LIST_HEAD(&adapter->mac_filter_list);
2943 INIT_LIST_HEAD(&adapter->vlan_filter_list); 3752 INIT_LIST_HEAD(&adapter->vlan_filter_list);
3753 INIT_LIST_HEAD(&adapter->cloud_filter_list);
2944 3754
2945 INIT_WORK(&adapter->reset_task, i40evf_reset_task); 3755 INIT_WORK(&adapter->reset_task, i40evf_reset_task);
2946 INIT_WORK(&adapter->adminq_task, i40evf_adminq_task); 3756 INIT_WORK(&adapter->adminq_task, i40evf_adminq_task);
@@ -3065,7 +3875,9 @@ static void i40evf_remove(struct pci_dev *pdev)
3065{ 3875{
3066 struct net_device *netdev = pci_get_drvdata(pdev); 3876 struct net_device *netdev = pci_get_drvdata(pdev);
3067 struct i40evf_adapter *adapter = netdev_priv(netdev); 3877 struct i40evf_adapter *adapter = netdev_priv(netdev);
3878 struct i40evf_vlan_filter *vlf, *vlftmp;
3068 struct i40evf_mac_filter *f, *ftmp; 3879 struct i40evf_mac_filter *f, *ftmp;
3880 struct i40evf_cloud_filter *cf, *cftmp;
3069 struct i40e_hw *hw = &adapter->hw; 3881 struct i40e_hw *hw = &adapter->hw;
3070 int err; 3882 int err;
3071 /* Indicate we are in remove and not to run reset_task */ 3883 /* Indicate we are in remove and not to run reset_task */
@@ -3087,6 +3899,7 @@ static void i40evf_remove(struct pci_dev *pdev)
3087 /* Shut down all the garbage mashers on the detention level */ 3899 /* Shut down all the garbage mashers on the detention level */
3088 adapter->state = __I40EVF_REMOVE; 3900 adapter->state = __I40EVF_REMOVE;
3089 adapter->aq_required = 0; 3901 adapter->aq_required = 0;
3902 adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
3090 i40evf_request_reset(adapter); 3903 i40evf_request_reset(adapter);
3091 msleep(50); 3904 msleep(50);
3092 /* If the FW isn't responding, kick it once, but only once. */ 3905 /* If the FW isn't responding, kick it once, but only once. */
@@ -3127,13 +3940,21 @@ static void i40evf_remove(struct pci_dev *pdev)
3127 list_del(&f->list); 3940 list_del(&f->list);
3128 kfree(f); 3941 kfree(f);
3129 } 3942 }
3130 list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { 3943 list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list,
3131 list_del(&f->list); 3944 list) {
3132 kfree(f); 3945 list_del(&vlf->list);
3946 kfree(vlf);
3133 } 3947 }
3134 3948
3135 spin_unlock_bh(&adapter->mac_vlan_list_lock); 3949 spin_unlock_bh(&adapter->mac_vlan_list_lock);
3136 3950
3951 spin_lock_bh(&adapter->cloud_filter_list_lock);
3952 list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
3953 list_del(&cf->list);
3954 kfree(cf);
3955 }
3956 spin_unlock_bh(&adapter->cloud_filter_list_lock);
3957
3137 free_netdev(netdev); 3958 free_netdev(netdev);
3138 3959
3139 pci_disable_pcie_error_reporting(pdev); 3960 pci_disable_pcie_error_reporting(pdev);
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index 50ce0d6c09ef..6134b61e0938 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -161,7 +161,8 @@ int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter)
161 VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 | 161 VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 |
162 VIRTCHNL_VF_OFFLOAD_ENCAP | 162 VIRTCHNL_VF_OFFLOAD_ENCAP |
163 VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM | 163 VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM |
164 VIRTCHNL_VF_OFFLOAD_REQ_QUEUES; 164 VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
165 VIRTCHNL_VF_OFFLOAD_ADQ;
165 166
166 adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES; 167 adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES;
167 adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_CONFIG; 168 adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_CONFIG;
@@ -344,6 +345,7 @@ void i40evf_disable_queues(struct i40evf_adapter *adapter)
344void i40evf_map_queues(struct i40evf_adapter *adapter) 345void i40evf_map_queues(struct i40evf_adapter *adapter)
345{ 346{
346 struct virtchnl_irq_map_info *vimi; 347 struct virtchnl_irq_map_info *vimi;
348 struct virtchnl_vector_map *vecmap;
347 int v_idx, q_vectors, len; 349 int v_idx, q_vectors, len;
348 struct i40e_q_vector *q_vector; 350 struct i40e_q_vector *q_vector;
349 351
@@ -367,17 +369,22 @@ void i40evf_map_queues(struct i40evf_adapter *adapter)
367 vimi->num_vectors = adapter->num_msix_vectors; 369 vimi->num_vectors = adapter->num_msix_vectors;
368 /* Queue vectors first */ 370 /* Queue vectors first */
369 for (v_idx = 0; v_idx < q_vectors; v_idx++) { 371 for (v_idx = 0; v_idx < q_vectors; v_idx++) {
370 q_vector = adapter->q_vectors + v_idx; 372 q_vector = &adapter->q_vectors[v_idx];
371 vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id; 373 vecmap = &vimi->vecmap[v_idx];
372 vimi->vecmap[v_idx].vector_id = v_idx + NONQ_VECS; 374
373 vimi->vecmap[v_idx].txq_map = q_vector->ring_mask; 375 vecmap->vsi_id = adapter->vsi_res->vsi_id;
374 vimi->vecmap[v_idx].rxq_map = q_vector->ring_mask; 376 vecmap->vector_id = v_idx + NONQ_VECS;
377 vecmap->txq_map = q_vector->ring_mask;
378 vecmap->rxq_map = q_vector->ring_mask;
379 vecmap->rxitr_idx = I40E_RX_ITR;
380 vecmap->txitr_idx = I40E_TX_ITR;
375 } 381 }
376 /* Misc vector last - this is only for AdminQ messages */ 382 /* Misc vector last - this is only for AdminQ messages */
377 vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id; 383 vecmap = &vimi->vecmap[v_idx];
378 vimi->vecmap[v_idx].vector_id = 0; 384 vecmap->vsi_id = adapter->vsi_res->vsi_id;
379 vimi->vecmap[v_idx].txq_map = 0; 385 vecmap->vector_id = 0;
380 vimi->vecmap[v_idx].rxq_map = 0; 386 vecmap->txq_map = 0;
387 vecmap->rxq_map = 0;
381 388
382 adapter->aq_required &= ~I40EVF_FLAG_AQ_MAP_VECTORS; 389 adapter->aq_required &= ~I40EVF_FLAG_AQ_MAP_VECTORS;
383 i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_IRQ_MAP, 390 i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_IRQ_MAP,
@@ -459,7 +466,7 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter)
459 more = true; 466 more = true;
460 } 467 }
461 468
462 veal = kzalloc(len, GFP_KERNEL); 469 veal = kzalloc(len, GFP_ATOMIC);
463 if (!veal) { 470 if (!veal) {
464 spin_unlock_bh(&adapter->mac_vlan_list_lock); 471 spin_unlock_bh(&adapter->mac_vlan_list_lock);
465 return; 472 return;
@@ -532,7 +539,7 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter)
532 (count * sizeof(struct virtchnl_ether_addr)); 539 (count * sizeof(struct virtchnl_ether_addr));
533 more = true; 540 more = true;
534 } 541 }
535 veal = kzalloc(len, GFP_KERNEL); 542 veal = kzalloc(len, GFP_ATOMIC);
536 if (!veal) { 543 if (!veal) {
537 spin_unlock_bh(&adapter->mac_vlan_list_lock); 544 spin_unlock_bh(&adapter->mac_vlan_list_lock);
538 return; 545 return;
@@ -606,7 +613,7 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter)
606 (count * sizeof(u16)); 613 (count * sizeof(u16));
607 more = true; 614 more = true;
608 } 615 }
609 vvfl = kzalloc(len, GFP_KERNEL); 616 vvfl = kzalloc(len, GFP_ATOMIC);
610 if (!vvfl) { 617 if (!vvfl) {
611 spin_unlock_bh(&adapter->mac_vlan_list_lock); 618 spin_unlock_bh(&adapter->mac_vlan_list_lock);
612 return; 619 return;
@@ -678,7 +685,7 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter)
678 (count * sizeof(u16)); 685 (count * sizeof(u16));
679 more = true; 686 more = true;
680 } 687 }
681 vvfl = kzalloc(len, GFP_KERNEL); 688 vvfl = kzalloc(len, GFP_ATOMIC);
682 if (!vvfl) { 689 if (!vvfl) {
683 spin_unlock_bh(&adapter->mac_vlan_list_lock); 690 spin_unlock_bh(&adapter->mac_vlan_list_lock);
684 return; 691 return;
@@ -967,6 +974,201 @@ static void i40evf_print_link_message(struct i40evf_adapter *adapter)
967} 974}
968 975
969/** 976/**
977 * i40evf_enable_channel
978 * @adapter: adapter structure
979 *
980 * Request that the PF enable channels as specified by
981 * the user via tc tool.
982 **/
983void i40evf_enable_channels(struct i40evf_adapter *adapter)
984{
985 struct virtchnl_tc_info *vti = NULL;
986 u16 len;
987 int i;
988
989 if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
990 /* bail because we already have a command pending */
991 dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n",
992 adapter->current_op);
993 return;
994 }
995
996 len = (adapter->num_tc * sizeof(struct virtchnl_channel_info)) +
997 sizeof(struct virtchnl_tc_info);
998
999 vti = kzalloc(len, GFP_KERNEL);
1000 if (!vti)
1001 return;
1002 vti->num_tc = adapter->num_tc;
1003 for (i = 0; i < vti->num_tc; i++) {
1004 vti->list[i].count = adapter->ch_config.ch_info[i].count;
1005 vti->list[i].offset = adapter->ch_config.ch_info[i].offset;
1006 vti->list[i].pad = 0;
1007 vti->list[i].max_tx_rate =
1008 adapter->ch_config.ch_info[i].max_tx_rate;
1009 }
1010
1011 adapter->ch_config.state = __I40EVF_TC_RUNNING;
1012 adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
1013 adapter->current_op = VIRTCHNL_OP_ENABLE_CHANNELS;
1014 adapter->aq_required &= ~I40EVF_FLAG_AQ_ENABLE_CHANNELS;
1015 i40evf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_CHANNELS,
1016 (u8 *)vti, len);
1017 kfree(vti);
1018}
1019
1020/**
1021 * i40evf_disable_channel
1022 * @adapter: adapter structure
1023 *
1024 * Request that the PF disable channels that are configured
1025 **/
1026void i40evf_disable_channels(struct i40evf_adapter *adapter)
1027{
1028 if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
1029 /* bail because we already have a command pending */
1030 dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n",
1031 adapter->current_op);
1032 return;
1033 }
1034
1035 adapter->ch_config.state = __I40EVF_TC_INVALID;
1036 adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
1037 adapter->current_op = VIRTCHNL_OP_DISABLE_CHANNELS;
1038 adapter->aq_required &= ~I40EVF_FLAG_AQ_DISABLE_CHANNELS;
1039 i40evf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_CHANNELS,
1040 NULL, 0);
1041}
1042
1043/**
1044 * i40evf_print_cloud_filter
1045 * @adapter: adapter structure
1046 * @f: cloud filter to print
1047 *
1048 * Print the cloud filter
1049 **/
1050static void i40evf_print_cloud_filter(struct i40evf_adapter *adapter,
1051 struct virtchnl_filter f)
1052{
1053 switch (f.flow_type) {
1054 case VIRTCHNL_TCP_V4_FLOW:
1055 dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI4 src_ip %pI4 dst_port %hu src_port %hu\n",
1056 &f.data.tcp_spec.dst_mac, &f.data.tcp_spec.src_mac,
1057 ntohs(f.data.tcp_spec.vlan_id),
1058 &f.data.tcp_spec.dst_ip[0], &f.data.tcp_spec.src_ip[0],
1059 ntohs(f.data.tcp_spec.dst_port),
1060 ntohs(f.data.tcp_spec.src_port));
1061 break;
1062 case VIRTCHNL_TCP_V6_FLOW:
1063 dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI6 src_ip %pI6 dst_port %hu src_port %hu\n",
1064 &f.data.tcp_spec.dst_mac, &f.data.tcp_spec.src_mac,
1065 ntohs(f.data.tcp_spec.vlan_id),
1066 &f.data.tcp_spec.dst_ip, &f.data.tcp_spec.src_ip,
1067 ntohs(f.data.tcp_spec.dst_port),
1068 ntohs(f.data.tcp_spec.src_port));
1069 break;
1070 }
1071}
1072
1073/**
1074 * i40evf_add_cloud_filter
1075 * @adapter: adapter structure
1076 *
1077 * Request that the PF add cloud filters as specified
1078 * by the user via tc tool.
1079 **/
1080void i40evf_add_cloud_filter(struct i40evf_adapter *adapter)
1081{
1082 struct i40evf_cloud_filter *cf;
1083 struct virtchnl_filter *f;
1084 int len = 0, count = 0;
1085
1086 if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
1087 /* bail because we already have a command pending */
1088 dev_err(&adapter->pdev->dev, "Cannot add cloud filter, command %d pending\n",
1089 adapter->current_op);
1090 return;
1091 }
1092 list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
1093 if (cf->add) {
1094 count++;
1095 break;
1096 }
1097 }
1098 if (!count) {
1099 adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_CLOUD_FILTER;
1100 return;
1101 }
1102 adapter->current_op = VIRTCHNL_OP_ADD_CLOUD_FILTER;
1103
1104 len = sizeof(struct virtchnl_filter);
1105 f = kzalloc(len, GFP_KERNEL);
1106 if (!f)
1107 return;
1108
1109 list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
1110 if (cf->add) {
1111 memcpy(f, &cf->f, sizeof(struct virtchnl_filter));
1112 cf->add = false;
1113 cf->state = __I40EVF_CF_ADD_PENDING;
1114 i40evf_send_pf_msg(adapter,
1115 VIRTCHNL_OP_ADD_CLOUD_FILTER,
1116 (u8 *)f, len);
1117 }
1118 }
1119 kfree(f);
1120}
1121
1122/**
1123 * i40evf_del_cloud_filter
1124 * @adapter: adapter structure
1125 *
1126 * Request that the PF delete cloud filters as specified
1127 * by the user via tc tool.
1128 **/
1129void i40evf_del_cloud_filter(struct i40evf_adapter *adapter)
1130{
1131 struct i40evf_cloud_filter *cf, *cftmp;
1132 struct virtchnl_filter *f;
1133 int len = 0, count = 0;
1134
1135 if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
1136 /* bail because we already have a command pending */
1137 dev_err(&adapter->pdev->dev, "Cannot remove cloud filter, command %d pending\n",
1138 adapter->current_op);
1139 return;
1140 }
1141 list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
1142 if (cf->del) {
1143 count++;
1144 break;
1145 }
1146 }
1147 if (!count) {
1148 adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_CLOUD_FILTER;
1149 return;
1150 }
1151 adapter->current_op = VIRTCHNL_OP_DEL_CLOUD_FILTER;
1152
1153 len = sizeof(struct virtchnl_filter);
1154 f = kzalloc(len, GFP_KERNEL);
1155 if (!f)
1156 return;
1157
1158 list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
1159 if (cf->del) {
1160 memcpy(f, &cf->f, sizeof(struct virtchnl_filter));
1161 cf->del = false;
1162 cf->state = __I40EVF_CF_DEL_PENDING;
1163 i40evf_send_pf_msg(adapter,
1164 VIRTCHNL_OP_DEL_CLOUD_FILTER,
1165 (u8 *)f, len);
1166 }
1167 }
1168 kfree(f);
1169}
1170
1171/**
970 * i40evf_request_reset 1172 * i40evf_request_reset
971 * @adapter: adapter structure 1173 * @adapter: adapter structure
972 * 1174 *
@@ -1011,14 +1213,25 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
1011 if (adapter->link_up == link_up) 1213 if (adapter->link_up == link_up)
1012 break; 1214 break;
1013 1215
1014 /* If we get link up message and start queues before 1216 if (link_up) {
1015 * our queues are configured it will trigger a TX hang. 1217 /* If we get link up message and start queues
1016 * In that case, just ignore the link status message, 1218 * before our queues are configured it will
1017 * we'll get another one after we enable queues and 1219 * trigger a TX hang. In that case, just ignore
1018 * actually prepared to send traffic. 1220 * the link status message,we'll get another one
1019 */ 1221 * after we enable queues and actually prepared
1020 if (link_up && adapter->state != __I40EVF_RUNNING) 1222 * to send traffic.
1021 break; 1223 */
1224 if (adapter->state != __I40EVF_RUNNING)
1225 break;
1226
1227 /* For ADq enabled VF, we reconfigure VSIs and
1228 * re-allocate queues. Hence wait till all
1229 * queues are enabled.
1230 */
1231 if (adapter->flags &
1232 I40EVF_FLAG_QUEUES_DISABLED)
1233 break;
1234 }
1022 1235
1023 adapter->link_up = link_up; 1236 adapter->link_up = link_up;
1024 if (link_up) { 1237 if (link_up) {
@@ -1031,7 +1244,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
1031 i40evf_print_link_message(adapter); 1244 i40evf_print_link_message(adapter);
1032 break; 1245 break;
1033 case VIRTCHNL_EVENT_RESET_IMPENDING: 1246 case VIRTCHNL_EVENT_RESET_IMPENDING:
1034 dev_info(&adapter->pdev->dev, "PF reset warning received\n"); 1247 dev_info(&adapter->pdev->dev, "Reset warning received from the PF\n");
1035 if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING)) { 1248 if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING)) {
1036 adapter->flags |= I40EVF_FLAG_RESET_PENDING; 1249 adapter->flags |= I40EVF_FLAG_RESET_PENDING;
1037 dev_info(&adapter->pdev->dev, "Scheduling reset task\n"); 1250 dev_info(&adapter->pdev->dev, "Scheduling reset task\n");
@@ -1063,6 +1276,57 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
1063 dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n", 1276 dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n",
1064 i40evf_stat_str(&adapter->hw, v_retval)); 1277 i40evf_stat_str(&adapter->hw, v_retval));
1065 break; 1278 break;
1279 case VIRTCHNL_OP_ENABLE_CHANNELS:
1280 dev_err(&adapter->pdev->dev, "Failed to configure queue channels, error %s\n",
1281 i40evf_stat_str(&adapter->hw, v_retval));
1282 adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
1283 adapter->ch_config.state = __I40EVF_TC_INVALID;
1284 netdev_reset_tc(netdev);
1285 netif_tx_start_all_queues(netdev);
1286 break;
1287 case VIRTCHNL_OP_DISABLE_CHANNELS:
1288 dev_err(&adapter->pdev->dev, "Failed to disable queue channels, error %s\n",
1289 i40evf_stat_str(&adapter->hw, v_retval));
1290 adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
1291 adapter->ch_config.state = __I40EVF_TC_RUNNING;
1292 netif_tx_start_all_queues(netdev);
1293 break;
1294 case VIRTCHNL_OP_ADD_CLOUD_FILTER: {
1295 struct i40evf_cloud_filter *cf, *cftmp;
1296
1297 list_for_each_entry_safe(cf, cftmp,
1298 &adapter->cloud_filter_list,
1299 list) {
1300 if (cf->state == __I40EVF_CF_ADD_PENDING) {
1301 cf->state = __I40EVF_CF_INVALID;
1302 dev_info(&adapter->pdev->dev, "Failed to add cloud filter, error %s\n",
1303 i40evf_stat_str(&adapter->hw,
1304 v_retval));
1305 i40evf_print_cloud_filter(adapter,
1306 cf->f);
1307 list_del(&cf->list);
1308 kfree(cf);
1309 adapter->num_cloud_filters--;
1310 }
1311 }
1312 }
1313 break;
1314 case VIRTCHNL_OP_DEL_CLOUD_FILTER: {
1315 struct i40evf_cloud_filter *cf;
1316
1317 list_for_each_entry(cf, &adapter->cloud_filter_list,
1318 list) {
1319 if (cf->state == __I40EVF_CF_DEL_PENDING) {
1320 cf->state = __I40EVF_CF_ACTIVE;
1321 dev_info(&adapter->pdev->dev, "Failed to del cloud filter, error %s\n",
1322 i40evf_stat_str(&adapter->hw,
1323 v_retval));
1324 i40evf_print_cloud_filter(adapter,
1325 cf->f);
1326 }
1327 }
1328 }
1329 break;
1066 default: 1330 default:
1067 dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n", 1331 dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n",
1068 v_retval, 1332 v_retval,
@@ -1102,6 +1366,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
1102 case VIRTCHNL_OP_ENABLE_QUEUES: 1366 case VIRTCHNL_OP_ENABLE_QUEUES:
1103 /* enable transmits */ 1367 /* enable transmits */
1104 i40evf_irq_enable(adapter, true); 1368 i40evf_irq_enable(adapter, true);
1369 adapter->flags &= ~I40EVF_FLAG_QUEUES_DISABLED;
1105 break; 1370 break;
1106 case VIRTCHNL_OP_DISABLE_QUEUES: 1371 case VIRTCHNL_OP_DISABLE_QUEUES:
1107 i40evf_free_all_tx_resources(adapter); 1372 i40evf_free_all_tx_resources(adapter);
@@ -1156,6 +1421,29 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
1156 } 1421 }
1157 } 1422 }
1158 break; 1423 break;
1424 case VIRTCHNL_OP_ADD_CLOUD_FILTER: {
1425 struct i40evf_cloud_filter *cf;
1426
1427 list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
1428 if (cf->state == __I40EVF_CF_ADD_PENDING)
1429 cf->state = __I40EVF_CF_ACTIVE;
1430 }
1431 }
1432 break;
1433 case VIRTCHNL_OP_DEL_CLOUD_FILTER: {
1434 struct i40evf_cloud_filter *cf, *cftmp;
1435
1436 list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list,
1437 list) {
1438 if (cf->state == __I40EVF_CF_DEL_PENDING) {
1439 cf->state = __I40EVF_CF_INVALID;
1440 list_del(&cf->list);
1441 kfree(cf);
1442 adapter->num_cloud_filters--;
1443 }
1444 }
1445 }
1446 break;
1159 default: 1447 default:
1160 if (adapter->current_op && (v_opcode != adapter->current_op)) 1448 if (adapter->current_op && (v_opcode != adapter->current_op))
1161 dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n", 1449 dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n",
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index a1d7b88cf083..5a1668cdb461 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -7137,6 +7137,7 @@ static void mvpp2_set_rx_mode(struct net_device *dev)
7137 int id = port->id; 7137 int id = port->id;
7138 bool allmulti = dev->flags & IFF_ALLMULTI; 7138 bool allmulti = dev->flags & IFF_ALLMULTI;
7139 7139
7140retry:
7140 mvpp2_prs_mac_promisc_set(priv, id, dev->flags & IFF_PROMISC); 7141 mvpp2_prs_mac_promisc_set(priv, id, dev->flags & IFF_PROMISC);
7141 mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_ALL, allmulti); 7142 mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_ALL, allmulti);
7142 mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_IP6, allmulti); 7143 mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_IP6, allmulti);
@@ -7144,9 +7145,13 @@ static void mvpp2_set_rx_mode(struct net_device *dev)
7144 /* Remove all port->id's mcast enries */ 7145 /* Remove all port->id's mcast enries */
7145 mvpp2_prs_mcast_del_all(priv, id); 7146 mvpp2_prs_mcast_del_all(priv, id);
7146 7147
7147 if (allmulti && !netdev_mc_empty(dev)) { 7148 if (!allmulti) {
7148 netdev_for_each_mc_addr(ha, dev) 7149 netdev_for_each_mc_addr(ha, dev) {
7149 mvpp2_prs_mac_da_accept(priv, id, ha->addr, true); 7150 if (mvpp2_prs_mac_da_accept(priv, id, ha->addr, true)) {
7151 allmulti = true;
7152 goto retry;
7153 }
7154 }
7150 } 7155 }
7151} 7156}
7152 7157
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 9463c3fa254f..0cadcabfe86f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -20,7 +20,7 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \
20 spectrum_cnt.o spectrum_fid.o \ 20 spectrum_cnt.o spectrum_fid.o \
21 spectrum_ipip.o spectrum_acl_flex_actions.o \ 21 spectrum_ipip.o spectrum_acl_flex_actions.o \
22 spectrum_mr.o spectrum_mr_tcam.o \ 22 spectrum_mr.o spectrum_mr_tcam.o \
23 spectrum_qdisc.o 23 spectrum_qdisc.o spectrum_span.o
24mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o 24mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o
25mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o 25mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
26obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o 26obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 3dcc58d61506..bfde93910f82 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -71,6 +71,7 @@
71#include "spectrum_cnt.h" 71#include "spectrum_cnt.h"
72#include "spectrum_dpipe.h" 72#include "spectrum_dpipe.h"
73#include "spectrum_acl_flex_actions.h" 73#include "spectrum_acl_flex_actions.h"
74#include "spectrum_span.h"
74#include "../mlxfw/mlxfw.h" 75#include "../mlxfw/mlxfw.h"
75 76
76#define MLXSW_FWREV_MAJOR 13 77#define MLXSW_FWREV_MAJOR 13
@@ -487,327 +488,6 @@ static int mlxsw_sp_base_mac_get(struct mlxsw_sp *mlxsw_sp)
487 return 0; 488 return 0;
488} 489}
489 490
490static int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
491{
492 int i;
493
494 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN))
495 return -EIO;
496
497 mlxsw_sp->span.entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core,
498 MAX_SPAN);
499 mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count,
500 sizeof(struct mlxsw_sp_span_entry),
501 GFP_KERNEL);
502 if (!mlxsw_sp->span.entries)
503 return -ENOMEM;
504
505 for (i = 0; i < mlxsw_sp->span.entries_count; i++)
506 INIT_LIST_HEAD(&mlxsw_sp->span.entries[i].bound_ports_list);
507
508 return 0;
509}
510
511static void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
512{
513 int i;
514
515 for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
516 struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
517
518 WARN_ON_ONCE(!list_empty(&curr->bound_ports_list));
519 }
520 kfree(mlxsw_sp->span.entries);
521}
522
523static struct mlxsw_sp_span_entry *
524mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
525{
526 struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
527 struct mlxsw_sp_span_entry *span_entry;
528 char mpat_pl[MLXSW_REG_MPAT_LEN];
529 u8 local_port = port->local_port;
530 int index;
531 int i;
532 int err;
533
534 /* find a free entry to use */
535 index = -1;
536 for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
537 if (!mlxsw_sp->span.entries[i].used) {
538 index = i;
539 span_entry = &mlxsw_sp->span.entries[i];
540 break;
541 }
542 }
543 if (index < 0)
544 return NULL;
545
546 /* create a new port analayzer entry for local_port */
547 mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true);
548 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
549 if (err)
550 return NULL;
551
552 span_entry->used = true;
553 span_entry->id = index;
554 span_entry->ref_count = 1;
555 span_entry->local_port = local_port;
556 return span_entry;
557}
558
559static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp,
560 struct mlxsw_sp_span_entry *span_entry)
561{
562 u8 local_port = span_entry->local_port;
563 char mpat_pl[MLXSW_REG_MPAT_LEN];
564 int pa_id = span_entry->id;
565
566 mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false);
567 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
568 span_entry->used = false;
569}
570
571struct mlxsw_sp_span_entry *
572mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port)
573{
574 int i;
575
576 for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
577 struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
578
579 if (curr->used && curr->local_port == local_port)
580 return curr;
581 }
582 return NULL;
583}
584
585static struct mlxsw_sp_span_entry
586*mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
587{
588 struct mlxsw_sp_span_entry *span_entry;
589
590 span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp,
591 port->local_port);
592 if (span_entry) {
593 /* Already exists, just take a reference */
594 span_entry->ref_count++;
595 return span_entry;
596 }
597
598 return mlxsw_sp_span_entry_create(port);
599}
600
601static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
602 struct mlxsw_sp_span_entry *span_entry)
603{
604 WARN_ON(!span_entry->ref_count);
605 if (--span_entry->ref_count == 0)
606 mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
607 return 0;
608}
609
610static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port)
611{
612 struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
613 struct mlxsw_sp_span_inspected_port *p;
614 int i;
615
616 for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
617 struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
618
619 list_for_each_entry(p, &curr->bound_ports_list, list)
620 if (p->local_port == port->local_port &&
621 p->type == MLXSW_SP_SPAN_EGRESS)
622 return true;
623 }
624
625 return false;
626}
627
628static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp,
629 int mtu)
630{
631 return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1;
632}
633
634static int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
635{
636 struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
637 char sbib_pl[MLXSW_REG_SBIB_LEN];
638 int err;
639
640 /* If port is egress mirrored, the shared buffer size should be
641 * updated according to the mtu value
642 */
643 if (mlxsw_sp_span_is_egress_mirror(port)) {
644 u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu);
645
646 mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
647 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
648 if (err) {
649 netdev_err(port->dev, "Could not update shared buffer for mirroring\n");
650 return err;
651 }
652 }
653
654 return 0;
655}
656
657static struct mlxsw_sp_span_inspected_port *
658mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port,
659 struct mlxsw_sp_span_entry *span_entry)
660{
661 struct mlxsw_sp_span_inspected_port *p;
662
663 list_for_each_entry(p, &span_entry->bound_ports_list, list)
664 if (port->local_port == p->local_port)
665 return p;
666 return NULL;
667}
668
669static int
670mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port,
671 struct mlxsw_sp_span_entry *span_entry,
672 enum mlxsw_sp_span_type type,
673 bool bind)
674{
675 struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
676 char mpar_pl[MLXSW_REG_MPAR_LEN];
677 int pa_id = span_entry->id;
678
679 /* bind the port to the SPAN entry */
680 mlxsw_reg_mpar_pack(mpar_pl, port->local_port,
681 (enum mlxsw_reg_mpar_i_e) type, bind, pa_id);
682 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
683}
684
685static int
686mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
687 struct mlxsw_sp_span_entry *span_entry,
688 enum mlxsw_sp_span_type type,
689 bool bind)
690{
691 struct mlxsw_sp_span_inspected_port *inspected_port;
692 struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
693 char sbib_pl[MLXSW_REG_SBIB_LEN];
694 int err;
695
696 /* if it is an egress SPAN, bind a shared buffer to it */
697 if (type == MLXSW_SP_SPAN_EGRESS) {
698 u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp,
699 port->dev->mtu);
700
701 mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
702 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
703 if (err) {
704 netdev_err(port->dev, "Could not create shared buffer for mirroring\n");
705 return err;
706 }
707 }
708
709 if (bind) {
710 err = mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
711 true);
712 if (err)
713 goto err_port_bind;
714 }
715
716 inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL);
717 if (!inspected_port) {
718 err = -ENOMEM;
719 goto err_inspected_port_alloc;
720 }
721 inspected_port->local_port = port->local_port;
722 inspected_port->type = type;
723 list_add_tail(&inspected_port->list, &span_entry->bound_ports_list);
724
725 return 0;
726
727err_inspected_port_alloc:
728 if (bind)
729 mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
730 false);
731err_port_bind:
732 if (type == MLXSW_SP_SPAN_EGRESS) {
733 mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
734 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
735 }
736 return err;
737}
738
739static void
740mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
741 struct mlxsw_sp_span_entry *span_entry,
742 enum mlxsw_sp_span_type type,
743 bool bind)
744{
745 struct mlxsw_sp_span_inspected_port *inspected_port;
746 struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
747 char sbib_pl[MLXSW_REG_SBIB_LEN];
748
749 inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry);
750 if (!inspected_port)
751 return;
752
753 if (bind)
754 mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
755 false);
756 /* remove the SBIB buffer if it was egress SPAN */
757 if (type == MLXSW_SP_SPAN_EGRESS) {
758 mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
759 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
760 }
761
762 mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
763
764 list_del(&inspected_port->list);
765 kfree(inspected_port);
766}
767
768int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
769 struct mlxsw_sp_port *to,
770 enum mlxsw_sp_span_type type, bool bind)
771{
772 struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
773 struct mlxsw_sp_span_entry *span_entry;
774 int err;
775
776 span_entry = mlxsw_sp_span_entry_get(to);
777 if (!span_entry)
778 return -ENOENT;
779
780 netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n",
781 span_entry->id);
782
783 err = mlxsw_sp_span_inspected_port_add(from, span_entry, type, bind);
784 if (err)
785 goto err_port_bind;
786
787 return 0;
788
789err_port_bind:
790 mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
791 return err;
792}
793
794void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
795 enum mlxsw_sp_span_type type, bool bind)
796{
797 struct mlxsw_sp_span_entry *span_entry;
798
799 span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp,
800 destination_port);
801 if (!span_entry) {
802 netdev_err(from->dev, "no span entry found\n");
803 return;
804 }
805
806 netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n",
807 span_entry->id);
808 mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind);
809}
810
811static int mlxsw_sp_port_sample_set(struct mlxsw_sp_port *mlxsw_sp_port, 491static int mlxsw_sp_port_sample_set(struct mlxsw_sp_port *mlxsw_sp_port,
812 bool enable, u32 rate) 492 bool enable, u32 rate)
813{ 493{
@@ -4118,70 +3798,6 @@ static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
4118 .resource_query_enable = 1, 3798 .resource_query_enable = 1,
4119}; 3799};
4120 3800
4121static bool
4122mlxsw_sp_resource_kvd_granularity_validate(struct netlink_ext_ack *extack,
4123 u64 size)
4124{
4125 const struct mlxsw_config_profile *profile;
4126
4127 profile = &mlxsw_sp_config_profile;
4128 if (size % profile->kvd_hash_granularity) {
4129 NL_SET_ERR_MSG_MOD(extack, "resource set with wrong granularity");
4130 return false;
4131 }
4132 return true;
4133}
4134
4135static int
4136mlxsw_sp_resource_kvd_size_validate(struct devlink *devlink, u64 size,
4137 struct netlink_ext_ack *extack)
4138{
4139 NL_SET_ERR_MSG_MOD(extack, "kvd size cannot be changed");
4140 return -EINVAL;
4141}
4142
4143static int
4144mlxsw_sp_resource_kvd_linear_size_validate(struct devlink *devlink, u64 size,
4145 struct netlink_ext_ack *extack)
4146{
4147 if (!mlxsw_sp_resource_kvd_granularity_validate(extack, size))
4148 return -EINVAL;
4149
4150 return 0;
4151}
4152
4153static int
4154mlxsw_sp_resource_kvd_hash_single_size_validate(struct devlink *devlink, u64 size,
4155 struct netlink_ext_ack *extack)
4156{
4157 struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
4158
4159 if (!mlxsw_sp_resource_kvd_granularity_validate(extack, size))
4160 return -EINVAL;
4161
4162 if (size < MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE)) {
4163 NL_SET_ERR_MSG_MOD(extack, "hash single size is smaller than minimum");
4164 return -EINVAL;
4165 }
4166 return 0;
4167}
4168
4169static int
4170mlxsw_sp_resource_kvd_hash_double_size_validate(struct devlink *devlink, u64 size,
4171 struct netlink_ext_ack *extack)
4172{
4173 struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
4174
4175 if (!mlxsw_sp_resource_kvd_granularity_validate(extack, size))
4176 return -EINVAL;
4177
4178 if (size < MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE)) {
4179 NL_SET_ERR_MSG_MOD(extack, "hash double size is smaller than minimum");
4180 return -EINVAL;
4181 }
4182 return 0;
4183}
4184
4185static u64 mlxsw_sp_resource_kvd_linear_occ_get(struct devlink *devlink) 3801static u64 mlxsw_sp_resource_kvd_linear_occ_get(struct devlink *devlink)
4186{ 3802{
4187 struct mlxsw_core *mlxsw_core = devlink_priv(devlink); 3803 struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
@@ -4190,23 +3806,10 @@ static u64 mlxsw_sp_resource_kvd_linear_occ_get(struct devlink *devlink)
4190 return mlxsw_sp_kvdl_occ_get(mlxsw_sp); 3806 return mlxsw_sp_kvdl_occ_get(mlxsw_sp);
4191} 3807}
4192 3808
4193static struct devlink_resource_ops mlxsw_sp_resource_kvd_ops = {
4194 .size_validate = mlxsw_sp_resource_kvd_size_validate,
4195};
4196
4197static struct devlink_resource_ops mlxsw_sp_resource_kvd_linear_ops = { 3809static struct devlink_resource_ops mlxsw_sp_resource_kvd_linear_ops = {
4198 .size_validate = mlxsw_sp_resource_kvd_linear_size_validate,
4199 .occ_get = mlxsw_sp_resource_kvd_linear_occ_get, 3810 .occ_get = mlxsw_sp_resource_kvd_linear_occ_get,
4200}; 3811};
4201 3812
4202static struct devlink_resource_ops mlxsw_sp_resource_kvd_hash_single_ops = {
4203 .size_validate = mlxsw_sp_resource_kvd_hash_single_size_validate,
4204};
4205
4206static struct devlink_resource_ops mlxsw_sp_resource_kvd_hash_double_ops = {
4207 .size_validate = mlxsw_sp_resource_kvd_hash_double_size_validate,
4208};
4209
4210static struct devlink_resource_size_params mlxsw_sp_kvd_size_params; 3813static struct devlink_resource_size_params mlxsw_sp_kvd_size_params;
4211static struct devlink_resource_size_params mlxsw_sp_linear_size_params; 3814static struct devlink_resource_size_params mlxsw_sp_linear_size_params;
4212static struct devlink_resource_size_params mlxsw_sp_hash_single_size_params; 3815static struct devlink_resource_size_params mlxsw_sp_hash_single_size_params;
@@ -4268,7 +3871,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
4268 MLXSW_SP_RESOURCE_KVD, 3871 MLXSW_SP_RESOURCE_KVD,
4269 DEVLINK_RESOURCE_ID_PARENT_TOP, 3872 DEVLINK_RESOURCE_ID_PARENT_TOP,
4270 &mlxsw_sp_kvd_size_params, 3873 &mlxsw_sp_kvd_size_params,
4271 &mlxsw_sp_resource_kvd_ops); 3874 NULL);
4272 if (err) 3875 if (err)
4273 return err; 3876 return err;
4274 3877
@@ -4282,6 +3885,10 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
4282 if (err) 3885 if (err)
4283 return err; 3886 return err;
4284 3887
3888 err = mlxsw_sp_kvdl_resources_register(devlink);
3889 if (err)
3890 return err;
3891
4285 double_size = kvd_size - linear_size; 3892 double_size = kvd_size - linear_size;
4286 double_size *= profile->kvd_hash_double_parts; 3893 double_size *= profile->kvd_hash_double_parts;
4287 double_size /= profile->kvd_hash_double_parts + 3894 double_size /= profile->kvd_hash_double_parts +
@@ -4292,7 +3899,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
4292 MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE, 3899 MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
4293 MLXSW_SP_RESOURCE_KVD, 3900 MLXSW_SP_RESOURCE_KVD,
4294 &mlxsw_sp_hash_double_size_params, 3901 &mlxsw_sp_hash_double_size_params,
4295 &mlxsw_sp_resource_kvd_hash_double_ops); 3902 NULL);
4296 if (err) 3903 if (err)
4297 return err; 3904 return err;
4298 3905
@@ -4302,7 +3909,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
4302 MLXSW_SP_RESOURCE_KVD_HASH_SINGLE, 3909 MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
4303 MLXSW_SP_RESOURCE_KVD, 3910 MLXSW_SP_RESOURCE_KVD,
4304 &mlxsw_sp_hash_single_size_params, 3911 &mlxsw_sp_hash_single_size_params,
4305 &mlxsw_sp_resource_kvd_hash_single_ops); 3912 NULL);
4306 if (err) 3913 if (err)
4307 return err; 3914 return err;
4308 3915
@@ -4556,13 +4163,11 @@ mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp,
4556 u16 lag_id; 4163 u16 lag_id;
4557 4164
4558 if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) { 4165 if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) {
4559 NL_SET_ERR_MSG(extack, 4166 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported LAG devices");
4560 "spectrum: Exceeded number of supported LAG devices");
4561 return false; 4167 return false;
4562 } 4168 }
4563 if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) { 4169 if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
4564 NL_SET_ERR_MSG(extack, 4170 NL_SET_ERR_MSG_MOD(extack, "LAG device using unsupported Tx type");
4565 "spectrum: LAG device using unsupported Tx type");
4566 return false; 4171 return false;
4567 } 4172 }
4568 return true; 4173 return true;
@@ -4804,8 +4409,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
4804 !netif_is_lag_master(upper_dev) && 4409 !netif_is_lag_master(upper_dev) &&
4805 !netif_is_bridge_master(upper_dev) && 4410 !netif_is_bridge_master(upper_dev) &&
4806 !netif_is_ovs_master(upper_dev)) { 4411 !netif_is_ovs_master(upper_dev)) {
4807 NL_SET_ERR_MSG(extack, 4412 NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
4808 "spectrum: Unknown upper device type");
4809 return -EINVAL; 4413 return -EINVAL;
4810 } 4414 }
4811 if (!info->linking) 4415 if (!info->linking)
@@ -4814,8 +4418,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
4814 (!netif_is_bridge_master(upper_dev) || 4418 (!netif_is_bridge_master(upper_dev) ||
4815 !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, 4419 !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
4816 upper_dev))) { 4420 upper_dev))) {
4817 NL_SET_ERR_MSG(extack, 4421 NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported");
4818 "spectrum: Enslaving a port to a device that already has an upper device is not supported");
4819 return -EINVAL; 4422 return -EINVAL;
4820 } 4423 }
4821 if (netif_is_lag_master(upper_dev) && 4424 if (netif_is_lag_master(upper_dev) &&
@@ -4823,24 +4426,20 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
4823 info->upper_info, extack)) 4426 info->upper_info, extack))
4824 return -EINVAL; 4427 return -EINVAL;
4825 if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) { 4428 if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) {
4826 NL_SET_ERR_MSG(extack, 4429 NL_SET_ERR_MSG_MOD(extack, "Master device is a LAG master and this device has a VLAN");
4827 "spectrum: Master device is a LAG master and this device has a VLAN");
4828 return -EINVAL; 4430 return -EINVAL;
4829 } 4431 }
4830 if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) && 4432 if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) &&
4831 !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) { 4433 !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) {
4832 NL_SET_ERR_MSG(extack, 4434 NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on a LAG port");
4833 "spectrum: Can not put a VLAN on a LAG port");
4834 return -EINVAL; 4435 return -EINVAL;
4835 } 4436 }
4836 if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) { 4437 if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) {
4837 NL_SET_ERR_MSG(extack, 4438 NL_SET_ERR_MSG_MOD(extack, "Master device is an OVS master and this device has a VLAN");
4838 "spectrum: Master device is an OVS master and this device has a VLAN");
4839 return -EINVAL; 4439 return -EINVAL;
4840 } 4440 }
4841 if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) { 4441 if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) {
4842 NL_SET_ERR_MSG(extack, 4442 NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on an OVS port");
4843 "spectrum: Can not put a VLAN on an OVS port");
4844 return -EINVAL; 4443 return -EINVAL;
4845 } 4444 }
4846 break; 4445 break;
@@ -4953,7 +4552,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
4953 case NETDEV_PRECHANGEUPPER: 4552 case NETDEV_PRECHANGEUPPER:
4954 upper_dev = info->upper_dev; 4553 upper_dev = info->upper_dev;
4955 if (!netif_is_bridge_master(upper_dev)) { 4554 if (!netif_is_bridge_master(upper_dev)) {
4956 NL_SET_ERR_MSG(extack, "spectrum: VLAN devices only support bridge and VRF uppers"); 4555 NL_SET_ERR_MSG_MOD(extack, "VLAN devices only support bridge and VRF uppers");
4957 return -EINVAL; 4556 return -EINVAL;
4958 } 4557 }
4959 if (!info->linking) 4558 if (!info->linking)
@@ -4962,7 +4561,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
4962 (!netif_is_bridge_master(upper_dev) || 4561 (!netif_is_bridge_master(upper_dev) ||
4963 !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, 4562 !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
4964 upper_dev))) { 4563 upper_dev))) {
4965 NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported"); 4564 NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported");
4966 return -EINVAL; 4565 return -EINVAL;
4967 } 4566 }
4968 break; 4567 break;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index bdd8f94a452c..675e03a892ed 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -70,16 +70,23 @@
70#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR "linear" 70#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR "linear"
71#define MLXSW_SP_RESOURCE_NAME_KVD_HASH_SINGLE "hash_single" 71#define MLXSW_SP_RESOURCE_NAME_KVD_HASH_SINGLE "hash_single"
72#define MLXSW_SP_RESOURCE_NAME_KVD_HASH_DOUBLE "hash_double" 72#define MLXSW_SP_RESOURCE_NAME_KVD_HASH_DOUBLE "hash_double"
73#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES "singles"
74#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS "chunks"
75#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS "large_chunks"
73 76
74enum mlxsw_sp_resource_id { 77enum mlxsw_sp_resource_id {
75 MLXSW_SP_RESOURCE_KVD, 78 MLXSW_SP_RESOURCE_KVD,
76 MLXSW_SP_RESOURCE_KVD_LINEAR, 79 MLXSW_SP_RESOURCE_KVD_LINEAR,
77 MLXSW_SP_RESOURCE_KVD_HASH_SINGLE, 80 MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
78 MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE, 81 MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
82 MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
83 MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
84 MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
79}; 85};
80 86
81struct mlxsw_sp_port; 87struct mlxsw_sp_port;
82struct mlxsw_sp_rif; 88struct mlxsw_sp_rif;
89struct mlxsw_sp_span_entry;
83 90
84struct mlxsw_sp_upper { 91struct mlxsw_sp_upper {
85 struct net_device *dev; 92 struct net_device *dev;
@@ -111,25 +118,6 @@ struct mlxsw_sp_mid {
111 unsigned long *ports_in_mid; /* bits array */ 118 unsigned long *ports_in_mid; /* bits array */
112}; 119};
113 120
114enum mlxsw_sp_span_type {
115 MLXSW_SP_SPAN_EGRESS,
116 MLXSW_SP_SPAN_INGRESS
117};
118
119struct mlxsw_sp_span_inspected_port {
120 struct list_head list;
121 enum mlxsw_sp_span_type type;
122 u8 local_port;
123};
124
125struct mlxsw_sp_span_entry {
126 u8 local_port;
127 bool used;
128 struct list_head bound_ports_list;
129 int ref_count;
130 int id;
131};
132
133enum mlxsw_sp_port_mall_action_type { 121enum mlxsw_sp_port_mall_action_type {
134 MLXSW_SP_PORT_MALL_MIRROR, 122 MLXSW_SP_PORT_MALL_MIRROR,
135 MLXSW_SP_PORT_MALL_SAMPLE, 123 MLXSW_SP_PORT_MALL_SAMPLE,
@@ -396,16 +384,6 @@ struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev);
396struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev); 384struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev);
397void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port); 385void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port);
398struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev); 386struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev);
399int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
400 struct mlxsw_sp_port *to,
401 enum mlxsw_sp_span_type type,
402 bool bind);
403void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from,
404 u8 destination_port,
405 enum mlxsw_sp_span_type type,
406 bool bind);
407struct mlxsw_sp_span_entry *
408mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port);
409 387
410/* spectrum_dcb.c */ 388/* spectrum_dcb.c */
411#ifdef CONFIG_MLXSW_SPECTRUM_DCB 389#ifdef CONFIG_MLXSW_SPECTRUM_DCB
@@ -461,6 +439,7 @@ int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
461 unsigned int entry_count, 439 unsigned int entry_count,
462 unsigned int *p_alloc_size); 440 unsigned int *p_alloc_size);
463u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp); 441u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp);
442int mlxsw_sp_kvdl_resources_register(struct devlink *devlink);
464 443
465struct mlxsw_sp_acl_rule_info { 444struct mlxsw_sp_acl_rule_info {
466 unsigned int priority; 445 unsigned int priority;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
index 6ca6894125f0..f7e61cecc42b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
@@ -35,6 +35,7 @@
35 35
36#include "spectrum_acl_flex_actions.h" 36#include "spectrum_acl_flex_actions.h"
37#include "core_acl_flex_actions.h" 37#include "core_acl_flex_actions.h"
38#include "spectrum_span.h"
38 39
39#define MLXSW_SP_KVDL_ACT_EXT_SIZE 1 40#define MLXSW_SP_KVDL_ACT_EXT_SIZE 1
40 41
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
index 7502e53447bd..a1c4b1e63f8d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -37,122 +37,89 @@
37#include "spectrum_ipip.h" 37#include "spectrum_ipip.h"
38 38
39struct ip_tunnel_parm 39struct ip_tunnel_parm
40mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev) 40mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev)
41{ 41{
42 struct ip_tunnel *tun = netdev_priv(ol_dev); 42 struct ip_tunnel *tun = netdev_priv(ol_dev);
43 43
44 return tun->parms; 44 return tun->parms;
45} 45}
46 46
47static bool mlxsw_sp_ipip_parms_has_ikey(struct ip_tunnel_parm parms) 47static bool mlxsw_sp_ipip_parms4_has_ikey(struct ip_tunnel_parm parms)
48{ 48{
49 return !!(parms.i_flags & TUNNEL_KEY); 49 return !!(parms.i_flags & TUNNEL_KEY);
50} 50}
51 51
52static bool mlxsw_sp_ipip_parms_has_okey(struct ip_tunnel_parm parms) 52static bool mlxsw_sp_ipip_parms4_has_okey(struct ip_tunnel_parm parms)
53{ 53{
54 return !!(parms.o_flags & TUNNEL_KEY); 54 return !!(parms.o_flags & TUNNEL_KEY);
55} 55}
56 56
57static u32 mlxsw_sp_ipip_parms_ikey(struct ip_tunnel_parm parms) 57static u32 mlxsw_sp_ipip_parms4_ikey(struct ip_tunnel_parm parms)
58{ 58{
59 return mlxsw_sp_ipip_parms_has_ikey(parms) ? 59 return mlxsw_sp_ipip_parms4_has_ikey(parms) ?
60 be32_to_cpu(parms.i_key) : 0; 60 be32_to_cpu(parms.i_key) : 0;
61} 61}
62 62
63static u32 mlxsw_sp_ipip_parms_okey(struct ip_tunnel_parm parms) 63static u32 mlxsw_sp_ipip_parms4_okey(struct ip_tunnel_parm parms)
64{ 64{
65 return mlxsw_sp_ipip_parms_has_okey(parms) ? 65 return mlxsw_sp_ipip_parms4_has_okey(parms) ?
66 be32_to_cpu(parms.o_key) : 0; 66 be32_to_cpu(parms.o_key) : 0;
67} 67}
68 68
69static __be32 mlxsw_sp_ipip_parms_saddr4(struct ip_tunnel_parm parms) 69static union mlxsw_sp_l3addr
70mlxsw_sp_ipip_parms4_saddr(struct ip_tunnel_parm parms)
70{ 71{
71 return parms.iph.saddr; 72 return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.saddr };
72} 73}
73 74
74static union mlxsw_sp_l3addr 75static union mlxsw_sp_l3addr
75mlxsw_sp_ipip_parms_saddr(enum mlxsw_sp_l3proto proto, 76mlxsw_sp_ipip_parms4_daddr(struct ip_tunnel_parm parms)
76 struct ip_tunnel_parm parms)
77{ 77{
78 switch (proto) { 78 return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.daddr };
79 case MLXSW_SP_L3_PROTO_IPV4:
80 return (union mlxsw_sp_l3addr) {
81 .addr4 = mlxsw_sp_ipip_parms_saddr4(parms),
82 };
83 case MLXSW_SP_L3_PROTO_IPV6:
84 break;
85 }
86
87 WARN_ON(1);
88 return (union mlxsw_sp_l3addr) {
89 .addr4 = 0,
90 };
91} 79}
92 80
93static __be32 mlxsw_sp_ipip_parms_daddr4(struct ip_tunnel_parm parms) 81union mlxsw_sp_l3addr
82mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
83 const struct net_device *ol_dev)
94{ 84{
95 return parms.iph.daddr; 85 struct ip_tunnel_parm parms4;
96}
97 86
98static union mlxsw_sp_l3addr
99mlxsw_sp_ipip_parms_daddr(enum mlxsw_sp_l3proto proto,
100 struct ip_tunnel_parm parms)
101{
102 switch (proto) { 87 switch (proto) {
103 case MLXSW_SP_L3_PROTO_IPV4: 88 case MLXSW_SP_L3_PROTO_IPV4:
104 return (union mlxsw_sp_l3addr) { 89 parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
105 .addr4 = mlxsw_sp_ipip_parms_daddr4(parms), 90 return mlxsw_sp_ipip_parms4_saddr(parms4);
106 };
107 case MLXSW_SP_L3_PROTO_IPV6: 91 case MLXSW_SP_L3_PROTO_IPV6:
108 break; 92 break;
109 } 93 }
110 94
111 WARN_ON(1); 95 WARN_ON(1);
112 return (union mlxsw_sp_l3addr) { 96 return (union mlxsw_sp_l3addr) {0};
113 .addr4 = 0,
114 };
115}
116
117static bool mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev)
118{
119 return mlxsw_sp_ipip_parms_has_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev));
120}
121
122static bool mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev)
123{
124 return mlxsw_sp_ipip_parms_has_okey(mlxsw_sp_ipip_netdev_parms(ol_dev));
125} 97}
126 98
127static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev) 99static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev)
128{ 100{
129 return mlxsw_sp_ipip_parms_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev));
130}
131 101
132static u32 mlxsw_sp_ipip_netdev_okey(const struct net_device *ol_dev) 102 struct ip_tunnel_parm parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
133{
134 return mlxsw_sp_ipip_parms_okey(mlxsw_sp_ipip_netdev_parms(ol_dev));
135}
136 103
137union mlxsw_sp_l3addr 104 return mlxsw_sp_ipip_parms4_daddr(parms4).addr4;
138mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
139 const struct net_device *ol_dev)
140{
141 return mlxsw_sp_ipip_parms_saddr(proto,
142 mlxsw_sp_ipip_netdev_parms(ol_dev));
143}
144
145static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev)
146{
147 return mlxsw_sp_ipip_parms_daddr4(mlxsw_sp_ipip_netdev_parms(ol_dev));
148} 105}
149 106
150static union mlxsw_sp_l3addr 107static union mlxsw_sp_l3addr
151mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, 108mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
152 const struct net_device *ol_dev) 109 const struct net_device *ol_dev)
153{ 110{
154 return mlxsw_sp_ipip_parms_daddr(proto, 111 struct ip_tunnel_parm parms4;
155 mlxsw_sp_ipip_netdev_parms(ol_dev)); 112
113 switch (proto) {
114 case MLXSW_SP_L3_PROTO_IPV4:
115 parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
116 return mlxsw_sp_ipip_parms4_daddr(parms4);
117 case MLXSW_SP_L3_PROTO_IPV6:
118 break;
119 }
120
121 WARN_ON(1);
122 return (union mlxsw_sp_l3addr) {0};
156} 123}
157 124
158static int 125static int
@@ -176,12 +143,17 @@ mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp,
176 u32 tunnel_index, 143 u32 tunnel_index,
177 struct mlxsw_sp_ipip_entry *ipip_entry) 144 struct mlxsw_sp_ipip_entry *ipip_entry)
178{ 145{
179 bool has_ikey = mlxsw_sp_ipip_netdev_has_ikey(ipip_entry->ol_dev);
180 u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); 146 u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
181 u32 ikey = mlxsw_sp_ipip_netdev_ikey(ipip_entry->ol_dev);
182 char rtdp_pl[MLXSW_REG_RTDP_LEN]; 147 char rtdp_pl[MLXSW_REG_RTDP_LEN];
148 struct ip_tunnel_parm parms;
183 unsigned int type_check; 149 unsigned int type_check;
150 bool has_ikey;
184 u32 daddr4; 151 u32 daddr4;
152 u32 ikey;
153
154 parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev);
155 has_ikey = mlxsw_sp_ipip_parms4_has_ikey(parms);
156 ikey = mlxsw_sp_ipip_parms4_ikey(parms);
185 157
186 mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index); 158 mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index);
187 159
@@ -273,14 +245,15 @@ static struct mlxsw_sp_rif_ipip_lb_config
273mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp, 245mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp,
274 const struct net_device *ol_dev) 246 const struct net_device *ol_dev)
275{ 247{
248 struct ip_tunnel_parm parms = mlxsw_sp_ipip_netdev_parms4(ol_dev);
276 enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt; 249 enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
277 250
278 lb_ipipt = mlxsw_sp_ipip_netdev_has_okey(ol_dev) ? 251 lb_ipipt = mlxsw_sp_ipip_parms4_has_okey(parms) ?
279 MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP : 252 MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP :
280 MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP; 253 MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP;
281 return (struct mlxsw_sp_rif_ipip_lb_config){ 254 return (struct mlxsw_sp_rif_ipip_lb_config){
282 .lb_ipipt = lb_ipipt, 255 .lb_ipipt = lb_ipipt,
283 .okey = mlxsw_sp_ipip_netdev_okey(ol_dev), 256 .okey = mlxsw_sp_ipip_parms4_okey(parms),
284 .ul_protocol = MLXSW_SP_L3_PROTO_IPV4, 257 .ul_protocol = MLXSW_SP_L3_PROTO_IPV4,
285 .saddr = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4, 258 .saddr = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4,
286 ol_dev), 259 ol_dev),
@@ -300,16 +273,12 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp,
300 bool update_nhs = false; 273 bool update_nhs = false;
301 int err = 0; 274 int err = 0;
302 275
303 new_parms = mlxsw_sp_ipip_netdev_parms(ipip_entry->ol_dev); 276 new_parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev);
304 277
305 new_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4, 278 new_saddr = mlxsw_sp_ipip_parms4_saddr(new_parms);
306 new_parms); 279 old_saddr = mlxsw_sp_ipip_parms4_saddr(ipip_entry->parms4);
307 old_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4, 280 new_daddr = mlxsw_sp_ipip_parms4_daddr(new_parms);
308 ipip_entry->parms); 281 old_daddr = mlxsw_sp_ipip_parms4_daddr(ipip_entry->parms4);
309 new_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4,
310 new_parms);
311 old_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4,
312 ipip_entry->parms);
313 282
314 if (!mlxsw_sp_l3addr_eq(&new_saddr, &old_saddr)) { 283 if (!mlxsw_sp_l3addr_eq(&new_saddr, &old_saddr)) {
315 u16 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev); 284 u16 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
@@ -326,14 +295,14 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp,
326 } 295 }
327 296
328 update_tunnel = true; 297 update_tunnel = true;
329 } else if ((mlxsw_sp_ipip_parms_okey(ipip_entry->parms) != 298 } else if ((mlxsw_sp_ipip_parms4_okey(ipip_entry->parms4) !=
330 mlxsw_sp_ipip_parms_okey(new_parms)) || 299 mlxsw_sp_ipip_parms4_okey(new_parms)) ||
331 ipip_entry->parms.link != new_parms.link) { 300 ipip_entry->parms4.link != new_parms.link) {
332 update_tunnel = true; 301 update_tunnel = true;
333 } else if (!mlxsw_sp_l3addr_eq(&new_daddr, &old_daddr)) { 302 } else if (!mlxsw_sp_l3addr_eq(&new_daddr, &old_daddr)) {
334 update_nhs = true; 303 update_nhs = true;
335 } else if (mlxsw_sp_ipip_parms_ikey(ipip_entry->parms) != 304 } else if (mlxsw_sp_ipip_parms4_ikey(ipip_entry->parms4) !=
336 mlxsw_sp_ipip_parms_ikey(new_parms)) { 305 mlxsw_sp_ipip_parms4_ikey(new_parms)) {
337 update_decap = true; 306 update_decap = true;
338 } 307 }
339 308
@@ -350,7 +319,7 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp,
350 false, false, false, 319 false, false, false,
351 extack); 320 extack);
352 321
353 ipip_entry->parms = new_parms; 322 ipip_entry->parms4 = new_parms;
354 return err; 323 return err;
355} 324}
356 325
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
index 04b08d9d76e9..a4ff5737eccc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -37,9 +37,10 @@
37 37
38#include "spectrum_router.h" 38#include "spectrum_router.h"
39#include <net/ip_fib.h> 39#include <net/ip_fib.h>
40#include <linux/if_tunnel.h>
40 41
41struct ip_tunnel_parm 42struct ip_tunnel_parm
42mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev); 43mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev);
43 44
44union mlxsw_sp_l3addr 45union mlxsw_sp_l3addr
45mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, 46mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
@@ -56,7 +57,9 @@ struct mlxsw_sp_ipip_entry {
56 struct mlxsw_sp_rif_ipip_lb *ol_lb; 57 struct mlxsw_sp_rif_ipip_lb *ol_lb;
57 struct mlxsw_sp_fib_entry *decap_fib_entry; 58 struct mlxsw_sp_fib_entry *decap_fib_entry;
58 struct list_head ipip_list_node; 59 struct list_head ipip_list_node;
59 struct ip_tunnel_parm parms; 60 union {
61 struct ip_tunnel_parm parms4;
62 };
60}; 63};
61 64
62struct mlxsw_sp_ipip_ops { 65struct mlxsw_sp_ipip_ops {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
index 55f9d2d70f9e..d27fa57ad3c3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -67,7 +67,7 @@ struct mlxsw_sp_kvdl_part_info {
67 67
68struct mlxsw_sp_kvdl_part { 68struct mlxsw_sp_kvdl_part {
69 struct list_head list; 69 struct list_head list;
70 const struct mlxsw_sp_kvdl_part_info *info; 70 struct mlxsw_sp_kvdl_part_info *info;
71 unsigned long usage[0]; /* Entries */ 71 unsigned long usage[0]; /* Entries */
72}; 72};
73 73
@@ -188,21 +188,27 @@ int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
188 return 0; 188 return 0;
189} 189}
190 190
191enum mlxsw_sp_kvdl_part_id {
192 MLXSW_SP_KVDL_PART_SINGLE,
193 MLXSW_SP_KVDL_PART_CHUNKS,
194 MLXSW_SP_KVDL_PART_LARGE_CHUNKS,
195};
196
191static const struct mlxsw_sp_kvdl_part_info kvdl_parts_info[] = { 197static const struct mlxsw_sp_kvdl_part_info kvdl_parts_info[] = {
192 { 198 {
193 .part_index = 0, 199 .part_index = MLXSW_SP_KVDL_PART_SINGLE,
194 .start_index = MLXSW_SP_KVDL_SINGLE_BASE, 200 .start_index = MLXSW_SP_KVDL_SINGLE_BASE,
195 .end_index = MLXSW_SP_KVDL_SINGLE_END, 201 .end_index = MLXSW_SP_KVDL_SINGLE_END,
196 .alloc_size = 1, 202 .alloc_size = 1,
197 }, 203 },
198 { 204 {
199 .part_index = 1, 205 .part_index = MLXSW_SP_KVDL_PART_CHUNKS,
200 .start_index = MLXSW_SP_KVDL_CHUNKS_BASE, 206 .start_index = MLXSW_SP_KVDL_CHUNKS_BASE,
201 .end_index = MLXSW_SP_KVDL_CHUNKS_END, 207 .end_index = MLXSW_SP_KVDL_CHUNKS_END,
202 .alloc_size = MLXSW_SP_CHUNK_MAX, 208 .alloc_size = MLXSW_SP_CHUNK_MAX,
203 }, 209 },
204 { 210 {
205 .part_index = 2, 211 .part_index = MLXSW_SP_KVDL_PART_LARGE_CHUNKS,
206 .start_index = MLXSW_SP_KVDL_LARGE_CHUNKS_BASE, 212 .start_index = MLXSW_SP_KVDL_LARGE_CHUNKS_BASE,
207 .end_index = MLXSW_SP_KVDL_LARGE_CHUNKS_END, 213 .end_index = MLXSW_SP_KVDL_LARGE_CHUNKS_END,
208 .alloc_size = MLXSW_SP_LARGE_CHUNK_MAX, 214 .alloc_size = MLXSW_SP_LARGE_CHUNK_MAX,
@@ -222,27 +228,74 @@ mlxsw_sp_kvdl_part_find(struct mlxsw_sp *mlxsw_sp, unsigned int part_index)
222 return NULL; 228 return NULL;
223} 229}
224 230
231static void
232mlxsw_sp_kvdl_part_update(struct mlxsw_sp *mlxsw_sp,
233 struct mlxsw_sp_kvdl_part *part, unsigned int size)
234{
235 struct mlxsw_sp_kvdl_part_info *info = part->info;
236
237 if (list_is_last(&part->list, &mlxsw_sp->kvdl->parts_list)) {
238 info->end_index = size - 1;
239 } else {
240 struct mlxsw_sp_kvdl_part *last_part;
241
242 last_part = list_next_entry(part, list);
243 info->start_index = last_part->info->end_index + 1;
244 info->end_index = info->start_index + size - 1;
245 }
246}
247
225static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp, 248static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
226 unsigned int part_index) 249 unsigned int part_index)
227{ 250{
251 struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
228 const struct mlxsw_sp_kvdl_part_info *info; 252 const struct mlxsw_sp_kvdl_part_info *info;
253 enum mlxsw_sp_resource_id resource_id;
229 struct mlxsw_sp_kvdl_part *part; 254 struct mlxsw_sp_kvdl_part *part;
255 bool need_update = true;
230 unsigned int nr_entries; 256 unsigned int nr_entries;
231 size_t usage_size; 257 size_t usage_size;
258 u64 resource_size;
259 int err;
232 260
233 info = &kvdl_parts_info[part_index]; 261 info = &kvdl_parts_info[part_index];
234 262
235 nr_entries = (info->end_index - info->start_index + 1) / 263 switch (part_index) {
236 info->alloc_size; 264 case MLXSW_SP_KVDL_PART_SINGLE:
265 resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE;
266 break;
267 case MLXSW_SP_KVDL_PART_CHUNKS:
268 resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS;
269 break;
270 case MLXSW_SP_KVDL_PART_LARGE_CHUNKS:
271 resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS;
272 break;
273 }
274
275 err = devlink_resource_size_get(devlink, resource_id, &resource_size);
276 if (err) {
277 need_update = false;
278 resource_size = info->end_index - info->start_index + 1;
279 }
280
281 nr_entries = resource_size / info->alloc_size;
237 usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long); 282 usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
238 part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL); 283 part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
239 if (!part) 284 if (!part)
240 return -ENOMEM; 285 return -ENOMEM;
241 286
242 part->info = info; 287 part->info = kmemdup(info, sizeof(*part->info), GFP_KERNEL);
243 list_add(&part->list, &mlxsw_sp->kvdl->parts_list); 288 if (!part->info)
289 goto err_part_info_alloc;
244 290
291 list_add(&part->list, &mlxsw_sp->kvdl->parts_list);
292 if (need_update)
293 mlxsw_sp_kvdl_part_update(mlxsw_sp, part, resource_size);
245 return 0; 294 return 0;
295
296err_part_info_alloc:
297 kfree(part);
298 return -ENOMEM;
246} 299}
247 300
248static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp *mlxsw_sp, 301static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp *mlxsw_sp,
@@ -255,6 +308,7 @@ static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp *mlxsw_sp,
255 return; 308 return;
256 309
257 list_del(&part->list); 310 list_del(&part->list);
311 kfree(part->info);
258 kfree(part); 312 kfree(part);
259} 313}
260 314
@@ -312,6 +366,123 @@ u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp)
312 return occ; 366 return occ;
313} 367}
314 368
369u64 mlxsw_sp_kvdl_single_occ_get(struct devlink *devlink)
370{
371 struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
372 struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
373 struct mlxsw_sp_kvdl_part *part;
374
375 part = mlxsw_sp_kvdl_part_find(mlxsw_sp, MLXSW_SP_KVDL_PART_SINGLE);
376 if (!part)
377 return -EINVAL;
378
379 return mlxsw_sp_kvdl_part_occ(part);
380}
381
382u64 mlxsw_sp_kvdl_chunks_occ_get(struct devlink *devlink)
383{
384 struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
385 struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
386 struct mlxsw_sp_kvdl_part *part;
387
388 part = mlxsw_sp_kvdl_part_find(mlxsw_sp, MLXSW_SP_KVDL_PART_CHUNKS);
389 if (!part)
390 return -EINVAL;
391
392 return mlxsw_sp_kvdl_part_occ(part);
393}
394
395u64 mlxsw_sp_kvdl_large_chunks_occ_get(struct devlink *devlink)
396{
397 struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
398 struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
399 struct mlxsw_sp_kvdl_part *part;
400
401 part = mlxsw_sp_kvdl_part_find(mlxsw_sp,
402 MLXSW_SP_KVDL_PART_LARGE_CHUNKS);
403 if (!part)
404 return -EINVAL;
405
406 return mlxsw_sp_kvdl_part_occ(part);
407}
408
409static struct devlink_resource_ops mlxsw_sp_kvdl_single_ops = {
410 .occ_get = mlxsw_sp_kvdl_single_occ_get,
411};
412
413static struct devlink_resource_ops mlxsw_sp_kvdl_chunks_ops = {
414 .occ_get = mlxsw_sp_kvdl_chunks_occ_get,
415};
416
417static struct devlink_resource_ops mlxsw_sp_kvdl_chunks_large_ops = {
418 .occ_get = mlxsw_sp_kvdl_large_chunks_occ_get,
419};
420
421static struct devlink_resource_size_params mlxsw_sp_kvdl_single_size_params = {
422 .size_min = 0,
423 .size_granularity = 1,
424 .unit = DEVLINK_RESOURCE_UNIT_ENTRY,
425};
426
427static struct devlink_resource_size_params mlxsw_sp_kvdl_chunks_size_params = {
428 .size_min = 0,
429 .size_granularity = MLXSW_SP_CHUNK_MAX,
430 .unit = DEVLINK_RESOURCE_UNIT_ENTRY,
431};
432
433static struct devlink_resource_size_params mlxsw_sp_kvdl_large_chunks_size_params = {
434 .size_min = 0,
435 .size_granularity = MLXSW_SP_LARGE_CHUNK_MAX,
436 .unit = DEVLINK_RESOURCE_UNIT_ENTRY,
437};
438
439static void
440mlxsw_sp_kvdl_resource_size_params_prepare(struct devlink *devlink)
441{
442 struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
443 u32 kvdl_max_size;
444
445 kvdl_max_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) -
446 MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE) -
447 MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE);
448
449 mlxsw_sp_kvdl_single_size_params.size_max = kvdl_max_size;
450 mlxsw_sp_kvdl_chunks_size_params.size_max = kvdl_max_size;
451 mlxsw_sp_kvdl_large_chunks_size_params.size_max = kvdl_max_size;
452}
453
454int mlxsw_sp_kvdl_resources_register(struct devlink *devlink)
455{
456 int err;
457
458 mlxsw_sp_kvdl_resource_size_params_prepare(devlink);
459 err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES,
460 false, MLXSW_SP_KVDL_SINGLE_SIZE,
461 MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
462 MLXSW_SP_RESOURCE_KVD_LINEAR,
463 &mlxsw_sp_kvdl_single_size_params,
464 &mlxsw_sp_kvdl_single_ops);
465 if (err)
466 return err;
467
468 err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS,
469 false, MLXSW_SP_KVDL_CHUNKS_SIZE,
470 MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
471 MLXSW_SP_RESOURCE_KVD_LINEAR,
472 &mlxsw_sp_kvdl_chunks_size_params,
473 &mlxsw_sp_kvdl_chunks_ops);
474 if (err)
475 return err;
476
477 err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS,
478 false, MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE,
479 MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
480 MLXSW_SP_RESOURCE_KVD_LINEAR,
481 &mlxsw_sp_kvdl_large_chunks_size_params,
482 &mlxsw_sp_kvdl_chunks_large_ops);
483 return err;
484}
485
315int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp) 486int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp)
316{ 487{
317 struct mlxsw_sp_kvdl *kvdl; 488 struct mlxsw_sp_kvdl *kvdl;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index f0b25baba09a..05146970c19c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1,10 +1,10 @@
1/* 1/*
2 * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 2 * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3 * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved. 3 * Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved.
4 * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> 4 * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5 * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com> 5 * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6 * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> 6 * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7 * Copyright (c) 2017 Petr Machata <petrm@mellanox.com> 7 * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
8 * 8 *
9 * Redistribution and use in source and binary forms, with or without 9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions are met: 10 * modification, are permitted provided that the following conditions are met:
@@ -788,37 +788,41 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
788 u32 tb_id, 788 u32 tb_id,
789 struct netlink_ext_ack *extack) 789 struct netlink_ext_ack *extack)
790{ 790{
791 struct mlxsw_sp_mr_table *mr4_table;
792 struct mlxsw_sp_fib *fib4;
793 struct mlxsw_sp_fib *fib6;
791 struct mlxsw_sp_vr *vr; 794 struct mlxsw_sp_vr *vr;
792 int err; 795 int err;
793 796
794 vr = mlxsw_sp_vr_find_unused(mlxsw_sp); 797 vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
795 if (!vr) { 798 if (!vr) {
796 NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers"); 799 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
797 return ERR_PTR(-EBUSY); 800 return ERR_PTR(-EBUSY);
798 } 801 }
799 vr->fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4); 802 fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
800 if (IS_ERR(vr->fib4)) 803 if (IS_ERR(fib4))
801 return ERR_CAST(vr->fib4); 804 return ERR_CAST(fib4);
802 vr->fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6); 805 fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
803 if (IS_ERR(vr->fib6)) { 806 if (IS_ERR(fib6)) {
804 err = PTR_ERR(vr->fib6); 807 err = PTR_ERR(fib6);
805 goto err_fib6_create; 808 goto err_fib6_create;
806 } 809 }
807 vr->mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id, 810 mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
808 MLXSW_SP_L3_PROTO_IPV4); 811 MLXSW_SP_L3_PROTO_IPV4);
809 if (IS_ERR(vr->mr4_table)) { 812 if (IS_ERR(mr4_table)) {
810 err = PTR_ERR(vr->mr4_table); 813 err = PTR_ERR(mr4_table);
811 goto err_mr_table_create; 814 goto err_mr_table_create;
812 } 815 }
816 vr->fib4 = fib4;
817 vr->fib6 = fib6;
818 vr->mr4_table = mr4_table;
813 vr->tb_id = tb_id; 819 vr->tb_id = tb_id;
814 return vr; 820 return vr;
815 821
816err_mr_table_create: 822err_mr_table_create:
817 mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6); 823 mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
818 vr->fib6 = NULL;
819err_fib6_create: 824err_fib6_create:
820 mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4); 825 mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
821 vr->fib4 = NULL;
822 return ERR_PTR(err); 826 return ERR_PTR(err);
823} 827}
824 828
@@ -1020,9 +1024,11 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1020 enum mlxsw_sp_ipip_type ipipt, 1024 enum mlxsw_sp_ipip_type ipipt,
1021 struct net_device *ol_dev) 1025 struct net_device *ol_dev)
1022{ 1026{
1027 const struct mlxsw_sp_ipip_ops *ipip_ops;
1023 struct mlxsw_sp_ipip_entry *ipip_entry; 1028 struct mlxsw_sp_ipip_entry *ipip_entry;
1024 struct mlxsw_sp_ipip_entry *ret = NULL; 1029 struct mlxsw_sp_ipip_entry *ret = NULL;
1025 1030
1031 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1026 ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL); 1032 ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1027 if (!ipip_entry) 1033 if (!ipip_entry)
1028 return ERR_PTR(-ENOMEM); 1034 return ERR_PTR(-ENOMEM);
@@ -1036,7 +1042,15 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1036 1042
1037 ipip_entry->ipipt = ipipt; 1043 ipip_entry->ipipt = ipipt;
1038 ipip_entry->ol_dev = ol_dev; 1044 ipip_entry->ol_dev = ol_dev;
1039 ipip_entry->parms = mlxsw_sp_ipip_netdev_parms(ol_dev); 1045
1046 switch (ipip_ops->ul_proto) {
1047 case MLXSW_SP_L3_PROTO_IPV4:
1048 ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1049 break;
1050 case MLXSW_SP_L3_PROTO_IPV6:
1051 WARN_ON(1);
1052 break;
1053 }
1040 1054
1041 return ipip_entry; 1055 return ipip_entry;
1042 1056
@@ -3790,6 +3804,9 @@ mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3790 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; 3804 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3791 int i; 3805 int i;
3792 3806
3807 if (!list_is_singular(&nh_grp->fib_list))
3808 return;
3809
3793 for (i = 0; i < nh_grp->count; i++) { 3810 for (i = 0; i < nh_grp->count; i++) {
3794 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; 3811 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3795 3812
@@ -5786,7 +5803,7 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event,
5786 } 5803 }
5787 5804
5788 if (err < 0) 5805 if (err < 0)
5789 NL_SET_ERR_MSG(extack, "spectrum: FIB rules not supported. Aborting offload"); 5806 NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported. Aborting offload");
5790 5807
5791 return err; 5808 return err;
5792} 5809}
@@ -6025,7 +6042,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6025 6042
6026 err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index); 6043 err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6027 if (err) { 6044 if (err) {
6028 NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported router interfaces"); 6045 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6029 goto err_rif_index_alloc; 6046 goto err_rif_index_alloc;
6030 } 6047 }
6031 6048
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
new file mode 100644
index 000000000000..c3bec37d71ed
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -0,0 +1,356 @@
1/*
2 * drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.c
3 * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the names of the copyright holders nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
16 *
17 * Alternatively, this software may be distributed under the terms of the
18 * GNU General Public License ("GPL") version 2 as published by the Free
19 * Software Foundation.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <linux/list.h>
35
36#include "spectrum.h"
37#include "spectrum_span.h"
38
39int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
40{
41 int i;
42
43 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN))
44 return -EIO;
45
46 mlxsw_sp->span.entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core,
47 MAX_SPAN);
48 mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count,
49 sizeof(struct mlxsw_sp_span_entry),
50 GFP_KERNEL);
51 if (!mlxsw_sp->span.entries)
52 return -ENOMEM;
53
54 for (i = 0; i < mlxsw_sp->span.entries_count; i++)
55 INIT_LIST_HEAD(&mlxsw_sp->span.entries[i].bound_ports_list);
56
57 return 0;
58}
59
60void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
61{
62 int i;
63
64 for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
65 struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
66
67 WARN_ON_ONCE(!list_empty(&curr->bound_ports_list));
68 }
69 kfree(mlxsw_sp->span.entries);
70}
71
72static struct mlxsw_sp_span_entry *
73mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
74{
75 struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
76 struct mlxsw_sp_span_entry *span_entry;
77 char mpat_pl[MLXSW_REG_MPAT_LEN];
78 u8 local_port = port->local_port;
79 int index;
80 int i;
81 int err;
82
83 /* find a free entry to use */
84 index = -1;
85 for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
86 if (!mlxsw_sp->span.entries[i].ref_count) {
87 index = i;
88 span_entry = &mlxsw_sp->span.entries[i];
89 break;
90 }
91 }
92 if (index < 0)
93 return NULL;
94
95 /* create a new port analayzer entry for local_port */
96 mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true);
97 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
98 if (err)
99 return NULL;
100
101 span_entry->id = index;
102 span_entry->ref_count = 1;
103 span_entry->local_port = local_port;
104 return span_entry;
105}
106
107static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp,
108 struct mlxsw_sp_span_entry *span_entry)
109{
110 u8 local_port = span_entry->local_port;
111 char mpat_pl[MLXSW_REG_MPAT_LEN];
112 int pa_id = span_entry->id;
113
114 mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false);
115 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
116}
117
118struct mlxsw_sp_span_entry *
119mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port)
120{
121 int i;
122
123 for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
124 struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
125
126 if (curr->ref_count && curr->local_port == local_port)
127 return curr;
128 }
129 return NULL;
130}
131
132static struct mlxsw_sp_span_entry *
133mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
134{
135 struct mlxsw_sp_span_entry *span_entry;
136
137 span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp,
138 port->local_port);
139 if (span_entry) {
140 /* Already exists, just take a reference */
141 span_entry->ref_count++;
142 return span_entry;
143 }
144
145 return mlxsw_sp_span_entry_create(port);
146}
147
148static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
149 struct mlxsw_sp_span_entry *span_entry)
150{
151 WARN_ON(!span_entry->ref_count);
152 if (--span_entry->ref_count == 0)
153 mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
154 return 0;
155}
156
157static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port)
158{
159 struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
160 struct mlxsw_sp_span_inspected_port *p;
161 int i;
162
163 for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
164 struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
165
166 list_for_each_entry(p, &curr->bound_ports_list, list)
167 if (p->local_port == port->local_port &&
168 p->type == MLXSW_SP_SPAN_EGRESS)
169 return true;
170 }
171
172 return false;
173}
174
175static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp,
176 int mtu)
177{
178 return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1;
179}
180
181int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
182{
183 struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
184 char sbib_pl[MLXSW_REG_SBIB_LEN];
185 int err;
186
187 /* If port is egress mirrored, the shared buffer size should be
188 * updated according to the mtu value
189 */
190 if (mlxsw_sp_span_is_egress_mirror(port)) {
191 u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu);
192
193 mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
194 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
195 if (err) {
196 netdev_err(port->dev, "Could not update shared buffer for mirroring\n");
197 return err;
198 }
199 }
200
201 return 0;
202}
203
204static struct mlxsw_sp_span_inspected_port *
205mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port,
206 struct mlxsw_sp_span_entry *span_entry)
207{
208 struct mlxsw_sp_span_inspected_port *p;
209
210 list_for_each_entry(p, &span_entry->bound_ports_list, list)
211 if (port->local_port == p->local_port)
212 return p;
213 return NULL;
214}
215
216static int
217mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port,
218 struct mlxsw_sp_span_entry *span_entry,
219 enum mlxsw_sp_span_type type,
220 bool bind)
221{
222 struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
223 char mpar_pl[MLXSW_REG_MPAR_LEN];
224 int pa_id = span_entry->id;
225
226 /* bind the port to the SPAN entry */
227 mlxsw_reg_mpar_pack(mpar_pl, port->local_port,
228 (enum mlxsw_reg_mpar_i_e)type, bind, pa_id);
229 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
230}
231
232static int
233mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
234 struct mlxsw_sp_span_entry *span_entry,
235 enum mlxsw_sp_span_type type,
236 bool bind)
237{
238 struct mlxsw_sp_span_inspected_port *inspected_port;
239 struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
240 char sbib_pl[MLXSW_REG_SBIB_LEN];
241 int err;
242
243 /* if it is an egress SPAN, bind a shared buffer to it */
244 if (type == MLXSW_SP_SPAN_EGRESS) {
245 u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp,
246 port->dev->mtu);
247
248 mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
249 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
250 if (err) {
251 netdev_err(port->dev, "Could not create shared buffer for mirroring\n");
252 return err;
253 }
254 }
255
256 if (bind) {
257 err = mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
258 true);
259 if (err)
260 goto err_port_bind;
261 }
262
263 inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL);
264 if (!inspected_port) {
265 err = -ENOMEM;
266 goto err_inspected_port_alloc;
267 }
268 inspected_port->local_port = port->local_port;
269 inspected_port->type = type;
270 list_add_tail(&inspected_port->list, &span_entry->bound_ports_list);
271
272 return 0;
273
274err_inspected_port_alloc:
275 if (bind)
276 mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
277 false);
278err_port_bind:
279 if (type == MLXSW_SP_SPAN_EGRESS) {
280 mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
281 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
282 }
283 return err;
284}
285
286static void
287mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
288 struct mlxsw_sp_span_entry *span_entry,
289 enum mlxsw_sp_span_type type,
290 bool bind)
291{
292 struct mlxsw_sp_span_inspected_port *inspected_port;
293 struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
294 char sbib_pl[MLXSW_REG_SBIB_LEN];
295
296 inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry);
297 if (!inspected_port)
298 return;
299
300 if (bind)
301 mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
302 false);
303 /* remove the SBIB buffer if it was egress SPAN */
304 if (type == MLXSW_SP_SPAN_EGRESS) {
305 mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
306 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
307 }
308
309 mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
310
311 list_del(&inspected_port->list);
312 kfree(inspected_port);
313}
314
315int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
316 struct mlxsw_sp_port *to,
317 enum mlxsw_sp_span_type type, bool bind)
318{
319 struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
320 struct mlxsw_sp_span_entry *span_entry;
321 int err;
322
323 span_entry = mlxsw_sp_span_entry_get(to);
324 if (!span_entry)
325 return -ENOENT;
326
327 netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n",
328 span_entry->id);
329
330 err = mlxsw_sp_span_inspected_port_add(from, span_entry, type, bind);
331 if (err)
332 goto err_port_bind;
333
334 return 0;
335
336err_port_bind:
337 mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
338 return err;
339}
340
341void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
342 enum mlxsw_sp_span_type type, bool bind)
343{
344 struct mlxsw_sp_span_entry *span_entry;
345
346 span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp,
347 destination_port);
348 if (!span_entry) {
349 netdev_err(from->dev, "no span entry found\n");
350 return;
351 }
352
353 netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n",
354 span_entry->id);
355 mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind);
356}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
new file mode 100644
index 000000000000..069050e385ff
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -0,0 +1,73 @@
1/*
2 * drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.h
3 * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the names of the copyright holders nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
16 *
17 * Alternatively, this software may be distributed under the terms of the
18 * GNU General Public License ("GPL") version 2 as published by the Free
19 * Software Foundation.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#ifndef _MLXSW_SPECTRUM_SPAN_H
35#define _MLXSW_SPECTRUM_SPAN_H
36
37#include <linux/types.h>
38
39struct mlxsw_sp;
40struct mlxsw_sp_port;
41
42enum mlxsw_sp_span_type {
43 MLXSW_SP_SPAN_EGRESS,
44 MLXSW_SP_SPAN_INGRESS
45};
46
47struct mlxsw_sp_span_inspected_port {
48 struct list_head list;
49 enum mlxsw_sp_span_type type;
50 u8 local_port;
51};
52
53struct mlxsw_sp_span_entry {
54 u8 local_port;
55 struct list_head bound_ports_list;
56 int ref_count;
57 int id;
58};
59
60int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp);
61void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp);
62
63int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
64 struct mlxsw_sp_port *to,
65 enum mlxsw_sp_span_type type, bool bind);
66void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
67 enum mlxsw_sp_span_type type, bool bind);
68struct mlxsw_sp_span_entry *
69mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port);
70
71int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu);
72
73#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 593ad31be749..f9f53af04fe1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -1819,7 +1819,7 @@ mlxsw_sp_bridge_8021q_port_join(struct mlxsw_sp_bridge_device *bridge_device,
1819 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; 1819 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
1820 1820
1821 if (is_vlan_dev(bridge_port->dev)) { 1821 if (is_vlan_dev(bridge_port->dev)) {
1822 NL_SET_ERR_MSG(extack, "spectrum: Can not enslave a VLAN device to a VLAN-aware bridge"); 1822 NL_SET_ERR_MSG_MOD(extack, "Can not enslave a VLAN device to a VLAN-aware bridge");
1823 return -EINVAL; 1823 return -EINVAL;
1824 } 1824 }
1825 1825
@@ -1885,7 +1885,7 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device,
1885 u16 vid; 1885 u16 vid;
1886 1886
1887 if (!is_vlan_dev(bridge_port->dev)) { 1887 if (!is_vlan_dev(bridge_port->dev)) {
1888 NL_SET_ERR_MSG(extack, "spectrum: Only VLAN devices can be enslaved to a VLAN-unaware bridge"); 1888 NL_SET_ERR_MSG_MOD(extack, "Only VLAN devices can be enslaved to a VLAN-unaware bridge");
1889 return -EINVAL; 1889 return -EINVAL;
1890 } 1890 }
1891 vid = vlan_dev_vlan_id(bridge_port->dev); 1891 vid = vlan_dev_vlan_id(bridge_port->dev);
@@ -1895,7 +1895,7 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device,
1895 return -EINVAL; 1895 return -EINVAL;
1896 1896
1897 if (mlxsw_sp_port_is_br_member(mlxsw_sp_port, bridge_device->dev)) { 1897 if (mlxsw_sp_port_is_br_member(mlxsw_sp_port, bridge_device->dev)) {
1898 NL_SET_ERR_MSG(extack, "spectrum: Can not bridge VLAN uppers of the same port"); 1898 NL_SET_ERR_MSG_MOD(extack, "Can not bridge VLAN uppers of the same port");
1899 return -EINVAL; 1899 return -EINVAL;
1900 } 1900 }
1901 1901
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index adfe474c2cf0..28c1cd5b823b 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -61,6 +61,13 @@
61#define NFP_FLOWER_MASK_MPLS_BOS BIT(8) 61#define NFP_FLOWER_MASK_MPLS_BOS BIT(8)
62#define NFP_FLOWER_MASK_MPLS_Q BIT(0) 62#define NFP_FLOWER_MASK_MPLS_Q BIT(0)
63 63
64/* Compressed HW representation of TCP Flags */
65#define NFP_FL_TCP_FLAG_URG BIT(4)
66#define NFP_FL_TCP_FLAG_PSH BIT(3)
67#define NFP_FL_TCP_FLAG_RST BIT(2)
68#define NFP_FL_TCP_FLAG_SYN BIT(1)
69#define NFP_FL_TCP_FLAG_FIN BIT(0)
70
64#define NFP_FL_SC_ACT_DROP 0x80000000 71#define NFP_FL_SC_ACT_DROP 0x80000000
65#define NFP_FL_SC_ACT_USER 0x7D000000 72#define NFP_FL_SC_ACT_USER 0x7D000000
66#define NFP_FL_SC_ACT_POPV 0x6A000000 73#define NFP_FL_SC_ACT_POPV 0x6A000000
@@ -257,7 +264,7 @@ struct nfp_flower_tp_ports {
257 * 3 2 1 264 * 3 2 1
258 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 265 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
259 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 266 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
260 * | DSCP |ECN| protocol | reserved | 267 * | DSCP |ECN| protocol | ttl | flags |
261 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 268 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
262 * | ipv4_addr_src | 269 * | ipv4_addr_src |
263 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 270 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -268,7 +275,7 @@ struct nfp_flower_ipv4 {
268 u8 tos; 275 u8 tos;
269 u8 proto; 276 u8 proto;
270 u8 ttl; 277 u8 ttl;
271 u8 reserved; 278 u8 flags;
272 __be32 ipv4_src; 279 __be32 ipv4_src;
273 __be32 ipv4_dst; 280 __be32 ipv4_dst;
274}; 281};
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 332ff0fdc038..c5cebf6fb1d3 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -41,6 +41,7 @@
41#include <linux/time64.h> 41#include <linux/time64.h>
42#include <linux/types.h> 42#include <linux/types.h>
43#include <net/pkt_cls.h> 43#include <net/pkt_cls.h>
44#include <net/tcp.h>
44#include <linux/workqueue.h> 45#include <linux/workqueue.h>
45 46
46struct net_device; 47struct net_device;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index 37c2ecae2a7a..b3bc8279d4fb 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -181,6 +181,26 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame,
181 frame->tos = flow_ip->tos; 181 frame->tos = flow_ip->tos;
182 frame->ttl = flow_ip->ttl; 182 frame->ttl = flow_ip->ttl;
183 } 183 }
184
185 if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) {
186 struct flow_dissector_key_tcp *tcp;
187 u32 tcp_flags;
188
189 tcp = skb_flow_dissector_target(flow->dissector,
190 FLOW_DISSECTOR_KEY_TCP, target);
191 tcp_flags = be16_to_cpu(tcp->flags);
192
193 if (tcp_flags & TCPHDR_FIN)
194 frame->flags |= NFP_FL_TCP_FLAG_FIN;
195 if (tcp_flags & TCPHDR_SYN)
196 frame->flags |= NFP_FL_TCP_FLAG_SYN;
197 if (tcp_flags & TCPHDR_RST)
198 frame->flags |= NFP_FL_TCP_FLAG_RST;
199 if (tcp_flags & TCPHDR_PSH)
200 frame->flags |= NFP_FL_TCP_FLAG_PSH;
201 if (tcp_flags & TCPHDR_URG)
202 frame->flags |= NFP_FL_TCP_FLAG_URG;
203 }
184} 204}
185 205
186static void 206static void
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index eb5c13dea8f5..f3586c519805 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -44,11 +44,16 @@
44#include "../nfp_net.h" 44#include "../nfp_net.h"
45#include "../nfp_port.h" 45#include "../nfp_port.h"
46 46
47#define NFP_FLOWER_SUPPORTED_TCPFLAGS \
48 (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST | \
49 TCPHDR_PSH | TCPHDR_URG)
50
47#define NFP_FLOWER_WHITELIST_DISSECTOR \ 51#define NFP_FLOWER_WHITELIST_DISSECTOR \
48 (BIT(FLOW_DISSECTOR_KEY_CONTROL) | \ 52 (BIT(FLOW_DISSECTOR_KEY_CONTROL) | \
49 BIT(FLOW_DISSECTOR_KEY_BASIC) | \ 53 BIT(FLOW_DISSECTOR_KEY_BASIC) | \
50 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | \ 54 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | \
51 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | \ 55 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | \
56 BIT(FLOW_DISSECTOR_KEY_TCP) | \
52 BIT(FLOW_DISSECTOR_KEY_PORTS) | \ 57 BIT(FLOW_DISSECTOR_KEY_PORTS) | \
53 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | \ 58 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | \
54 BIT(FLOW_DISSECTOR_KEY_VLAN) | \ 59 BIT(FLOW_DISSECTOR_KEY_VLAN) | \
@@ -288,6 +293,35 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
288 } 293 }
289 } 294 }
290 295
296 if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) {
297 struct flow_dissector_key_tcp *tcp;
298 u32 tcp_flags;
299
300 tcp = skb_flow_dissector_target(flow->dissector,
301 FLOW_DISSECTOR_KEY_TCP,
302 flow->key);
303 tcp_flags = be16_to_cpu(tcp->flags);
304
305 if (tcp_flags & ~NFP_FLOWER_SUPPORTED_TCPFLAGS)
306 return -EOPNOTSUPP;
307
308 /* We only support PSH and URG flags when either
309 * FIN, SYN or RST is present as well.
310 */
311 if ((tcp_flags & (TCPHDR_PSH | TCPHDR_URG)) &&
312 !(tcp_flags & (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST)))
313 return -EOPNOTSUPP;
314
315 /* We need to store TCP flags in the IPv4 key space, thus
316 * we need to ensure we include a IPv4 key layer if we have
317 * not done so already.
318 */
319 if (!(key_layer & NFP_FLOWER_LAYER_IPV4)) {
320 key_layer |= NFP_FLOWER_LAYER_IPV4;
321 key_size += sizeof(struct nfp_flower_ipv4);
322 }
323 }
324
291 ret_key_ls->key_layer = key_layer; 325 ret_key_ls->key_layer = key_layer;
292 ret_key_ls->key_layer_two = key_layer_two; 326 ret_key_ls->key_layer_two = key_layer_two;
293 ret_key_ls->key_size = key_size; 327 ret_key_ls->key_size = key_size;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index 4499a7333078..bb63c115537d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2015-2017 Netronome Systems, Inc. 2 * Copyright (C) 2015-2018 Netronome Systems, Inc.
3 * 3 *
4 * This software is dual licensed under the GNU General License Version 2, 4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this 5 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -51,12 +51,12 @@
51 * The configuration BAR is 8K in size, but due to 51 * The configuration BAR is 8K in size, but due to
52 * THB-350, 32k needs to be reserved. 52 * THB-350, 32k needs to be reserved.
53 */ 53 */
54#define NFP_NET_CFG_BAR_SZ (32 * 1024) 54#define NFP_NET_CFG_BAR_SZ (32 * 1024)
55 55
56/** 56/**
57 * Offset in Freelist buffer where packet starts on RX 57 * Offset in Freelist buffer where packet starts on RX
58 */ 58 */
59#define NFP_NET_RX_OFFSET 32 59#define NFP_NET_RX_OFFSET 32
60 60
61/** 61/**
62 * LSO parameters 62 * LSO parameters
@@ -75,65 +75,65 @@
75#define NFP_NET_META_PORTID 5 75#define NFP_NET_META_PORTID 5
76#define NFP_NET_META_CSUM 6 /* checksum complete type */ 76#define NFP_NET_META_CSUM 6 /* checksum complete type */
77 77
78#define NFP_META_PORT_ID_CTRL ~0U 78#define NFP_META_PORT_ID_CTRL ~0U
79 79
80/** 80/**
81 * Hash type pre-pended when a RSS hash was computed 81 * Hash type pre-pended when a RSS hash was computed
82 */ 82 */
83#define NFP_NET_RSS_NONE 0 83#define NFP_NET_RSS_NONE 0
84#define NFP_NET_RSS_IPV4 1 84#define NFP_NET_RSS_IPV4 1
85#define NFP_NET_RSS_IPV6 2 85#define NFP_NET_RSS_IPV6 2
86#define NFP_NET_RSS_IPV6_EX 3 86#define NFP_NET_RSS_IPV6_EX 3
87#define NFP_NET_RSS_IPV4_TCP 4 87#define NFP_NET_RSS_IPV4_TCP 4
88#define NFP_NET_RSS_IPV6_TCP 5 88#define NFP_NET_RSS_IPV6_TCP 5
89#define NFP_NET_RSS_IPV6_EX_TCP 6 89#define NFP_NET_RSS_IPV6_EX_TCP 6
90#define NFP_NET_RSS_IPV4_UDP 7 90#define NFP_NET_RSS_IPV4_UDP 7
91#define NFP_NET_RSS_IPV6_UDP 8 91#define NFP_NET_RSS_IPV6_UDP 8
92#define NFP_NET_RSS_IPV6_EX_UDP 9 92#define NFP_NET_RSS_IPV6_EX_UDP 9
93 93
94/** 94/**
95 * Ring counts 95 * Ring counts
96 * %NFP_NET_TXR_MAX: Maximum number of TX rings 96 * %NFP_NET_TXR_MAX: Maximum number of TX rings
97 * %NFP_NET_RXR_MAX: Maximum number of RX rings 97 * %NFP_NET_RXR_MAX: Maximum number of RX rings
98 */ 98 */
99#define NFP_NET_TXR_MAX 64 99#define NFP_NET_TXR_MAX 64
100#define NFP_NET_RXR_MAX 64 100#define NFP_NET_RXR_MAX 64
101 101
102/** 102/**
103 * Read/Write config words (0x0000 - 0x002c) 103 * Read/Write config words (0x0000 - 0x002c)
104 * %NFP_NET_CFG_CTRL: Global control 104 * %NFP_NET_CFG_CTRL: Global control
105 * %NFP_NET_CFG_UPDATE: Indicate which fields are updated 105 * %NFP_NET_CFG_UPDATE: Indicate which fields are updated
106 * %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings 106 * %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings
107 * %NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings 107 * %NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings
108 * %NFP_NET_CFG_MTU: Set MTU size 108 * %NFP_NET_CFG_MTU: Set MTU size
109 * %NFP_NET_CFG_FLBUFSZ: Set freelist buffer size (must be larger than MTU) 109 * %NFP_NET_CFG_FLBUFSZ: Set freelist buffer size (must be larger than MTU)
110 * %NFP_NET_CFG_EXN: MSI-X table entry for exceptions 110 * %NFP_NET_CFG_EXN: MSI-X table entry for exceptions
111 * %NFP_NET_CFG_LSC: MSI-X table entry for link state changes 111 * %NFP_NET_CFG_LSC: MSI-X table entry for link state changes
112 * %NFP_NET_CFG_MACADDR: MAC address 112 * %NFP_NET_CFG_MACADDR: MAC address
113 * 113 *
114 * TODO: 114 * TODO:
115 * - define Error details in UPDATE 115 * - define Error details in UPDATE
116 */ 116 */
117#define NFP_NET_CFG_CTRL 0x0000 117#define NFP_NET_CFG_CTRL 0x0000
118#define NFP_NET_CFG_CTRL_ENABLE (0x1 << 0) /* Global enable */ 118#define NFP_NET_CFG_CTRL_ENABLE (0x1 << 0) /* Global enable */
119#define NFP_NET_CFG_CTRL_PROMISC (0x1 << 1) /* Enable Promisc mode */ 119#define NFP_NET_CFG_CTRL_PROMISC (0x1 << 1) /* Enable Promisc mode */
120#define NFP_NET_CFG_CTRL_L2BC (0x1 << 2) /* Allow L2 Broadcast */ 120#define NFP_NET_CFG_CTRL_L2BC (0x1 << 2) /* Allow L2 Broadcast */
121#define NFP_NET_CFG_CTRL_L2MC (0x1 << 3) /* Allow L2 Multicast */ 121#define NFP_NET_CFG_CTRL_L2MC (0x1 << 3) /* Allow L2 Multicast */
122#define NFP_NET_CFG_CTRL_RXCSUM (0x1 << 4) /* Enable RX Checksum */ 122#define NFP_NET_CFG_CTRL_RXCSUM (0x1 << 4) /* Enable RX Checksum */
123#define NFP_NET_CFG_CTRL_TXCSUM (0x1 << 5) /* Enable TX Checksum */ 123#define NFP_NET_CFG_CTRL_TXCSUM (0x1 << 5) /* Enable TX Checksum */
124#define NFP_NET_CFG_CTRL_RXVLAN (0x1 << 6) /* Enable VLAN strip */ 124#define NFP_NET_CFG_CTRL_RXVLAN (0x1 << 6) /* Enable VLAN strip */
125#define NFP_NET_CFG_CTRL_TXVLAN (0x1 << 7) /* Enable VLAN insert */ 125#define NFP_NET_CFG_CTRL_TXVLAN (0x1 << 7) /* Enable VLAN insert */
126#define NFP_NET_CFG_CTRL_SCATTER (0x1 << 8) /* Scatter DMA */ 126#define NFP_NET_CFG_CTRL_SCATTER (0x1 << 8) /* Scatter DMA */
127#define NFP_NET_CFG_CTRL_GATHER (0x1 << 9) /* Gather DMA */ 127#define NFP_NET_CFG_CTRL_GATHER (0x1 << 9) /* Gather DMA */
128#define NFP_NET_CFG_CTRL_LSO (0x1 << 10) /* LSO/TSO (version 1) */ 128#define NFP_NET_CFG_CTRL_LSO (0x1 << 10) /* LSO/TSO (version 1) */
129#define NFP_NET_CFG_CTRL_CTAG_FILTER (0x1 << 11) /* VLAN CTAG filtering */ 129#define NFP_NET_CFG_CTRL_CTAG_FILTER (0x1 << 11) /* VLAN CTAG filtering */
130#define NFP_NET_CFG_CTRL_RINGCFG (0x1 << 16) /* Ring runtime changes */ 130#define NFP_NET_CFG_CTRL_RINGCFG (0x1 << 16) /* Ring runtime changes */
131#define NFP_NET_CFG_CTRL_RSS (0x1 << 17) /* RSS (version 1) */ 131#define NFP_NET_CFG_CTRL_RSS (0x1 << 17) /* RSS (version 1) */
132#define NFP_NET_CFG_CTRL_IRQMOD (0x1 << 18) /* Interrupt moderation */ 132#define NFP_NET_CFG_CTRL_IRQMOD (0x1 << 18) /* Interrupt moderation */
133#define NFP_NET_CFG_CTRL_RINGPRIO (0x1 << 19) /* Ring priorities */ 133#define NFP_NET_CFG_CTRL_RINGPRIO (0x1 << 19) /* Ring priorities */
134#define NFP_NET_CFG_CTRL_MSIXAUTO (0x1 << 20) /* MSI-X auto-masking */ 134#define NFP_NET_CFG_CTRL_MSIXAUTO (0x1 << 20) /* MSI-X auto-masking */
135#define NFP_NET_CFG_CTRL_TXRWB (0x1 << 21) /* Write-back of TX ring*/ 135#define NFP_NET_CFG_CTRL_TXRWB (0x1 << 21) /* Write-back of TX ring*/
136#define NFP_NET_CFG_CTRL_L2SWITCH (0x1 << 22) /* L2 Switch */ 136#define NFP_NET_CFG_CTRL_L2SWITCH (0x1 << 22) /* L2 Switch */
137#define NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */ 137#define NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */
138#define NFP_NET_CFG_CTRL_VXLAN (0x1 << 24) /* VXLAN tunnel support */ 138#define NFP_NET_CFG_CTRL_VXLAN (0x1 << 24) /* VXLAN tunnel support */
139#define NFP_NET_CFG_CTRL_NVGRE (0x1 << 25) /* NVGRE tunnel support */ 139#define NFP_NET_CFG_CTRL_NVGRE (0x1 << 25) /* NVGRE tunnel support */
@@ -152,35 +152,35 @@
152#define NFP_NET_CFG_CTRL_CHAIN_META (NFP_NET_CFG_CTRL_RSS2 | \ 152#define NFP_NET_CFG_CTRL_CHAIN_META (NFP_NET_CFG_CTRL_RSS2 | \
153 NFP_NET_CFG_CTRL_CSUM_COMPLETE) 153 NFP_NET_CFG_CTRL_CSUM_COMPLETE)
154 154
155#define NFP_NET_CFG_UPDATE 0x0004 155#define NFP_NET_CFG_UPDATE 0x0004
156#define NFP_NET_CFG_UPDATE_GEN (0x1 << 0) /* General update */ 156#define NFP_NET_CFG_UPDATE_GEN (0x1 << 0) /* General update */
157#define NFP_NET_CFG_UPDATE_RING (0x1 << 1) /* Ring config change */ 157#define NFP_NET_CFG_UPDATE_RING (0x1 << 1) /* Ring config change */
158#define NFP_NET_CFG_UPDATE_RSS (0x1 << 2) /* RSS config change */ 158#define NFP_NET_CFG_UPDATE_RSS (0x1 << 2) /* RSS config change */
159#define NFP_NET_CFG_UPDATE_TXRPRIO (0x1 << 3) /* TX Ring prio change */ 159#define NFP_NET_CFG_UPDATE_TXRPRIO (0x1 << 3) /* TX Ring prio change */
160#define NFP_NET_CFG_UPDATE_RXRPRIO (0x1 << 4) /* RX Ring prio change */ 160#define NFP_NET_CFG_UPDATE_RXRPRIO (0x1 << 4) /* RX Ring prio change */
161#define NFP_NET_CFG_UPDATE_MSIX (0x1 << 5) /* MSI-X change */ 161#define NFP_NET_CFG_UPDATE_MSIX (0x1 << 5) /* MSI-X change */
162#define NFP_NET_CFG_UPDATE_L2SWITCH (0x1 << 6) /* Switch changes */ 162#define NFP_NET_CFG_UPDATE_L2SWITCH (0x1 << 6) /* Switch changes */
163#define NFP_NET_CFG_UPDATE_RESET (0x1 << 7) /* Update due to FLR */ 163#define NFP_NET_CFG_UPDATE_RESET (0x1 << 7) /* Update due to FLR */
164#define NFP_NET_CFG_UPDATE_IRQMOD (0x1 << 8) /* IRQ mod change */ 164#define NFP_NET_CFG_UPDATE_IRQMOD (0x1 << 8) /* IRQ mod change */
165#define NFP_NET_CFG_UPDATE_VXLAN (0x1 << 9) /* VXLAN port change */ 165#define NFP_NET_CFG_UPDATE_VXLAN (0x1 << 9) /* VXLAN port change */
166#define NFP_NET_CFG_UPDATE_BPF (0x1 << 10) /* BPF program load */ 166#define NFP_NET_CFG_UPDATE_BPF (0x1 << 10) /* BPF program load */
167#define NFP_NET_CFG_UPDATE_MACADDR (0x1 << 11) /* MAC address change */ 167#define NFP_NET_CFG_UPDATE_MACADDR (0x1 << 11) /* MAC address change */
168#define NFP_NET_CFG_UPDATE_MBOX (0x1 << 12) /* Mailbox update */ 168#define NFP_NET_CFG_UPDATE_MBOX (0x1 << 12) /* Mailbox update */
169#define NFP_NET_CFG_UPDATE_VF (0x1 << 13) /* VF settings change */ 169#define NFP_NET_CFG_UPDATE_VF (0x1 << 13) /* VF settings change */
170#define NFP_NET_CFG_UPDATE_ERR (0x1 << 31) /* A error occurred */ 170#define NFP_NET_CFG_UPDATE_ERR (0x1 << 31) /* A error occurred */
171#define NFP_NET_CFG_TXRS_ENABLE 0x0008 171#define NFP_NET_CFG_TXRS_ENABLE 0x0008
172#define NFP_NET_CFG_RXRS_ENABLE 0x0010 172#define NFP_NET_CFG_RXRS_ENABLE 0x0010
173#define NFP_NET_CFG_MTU 0x0018 173#define NFP_NET_CFG_MTU 0x0018
174#define NFP_NET_CFG_FLBUFSZ 0x001c 174#define NFP_NET_CFG_FLBUFSZ 0x001c
175#define NFP_NET_CFG_EXN 0x001f 175#define NFP_NET_CFG_EXN 0x001f
176#define NFP_NET_CFG_LSC 0x0020 176#define NFP_NET_CFG_LSC 0x0020
177#define NFP_NET_CFG_MACADDR 0x0024 177#define NFP_NET_CFG_MACADDR 0x0024
178 178
179/** 179/**
180 * Read-only words (0x0030 - 0x0050): 180 * Read-only words (0x0030 - 0x0050):
181 * %NFP_NET_CFG_VERSION: Firmware version number 181 * %NFP_NET_CFG_VERSION: Firmware version number
182 * %NFP_NET_CFG_STS: Status 182 * %NFP_NET_CFG_STS: Status
183 * %NFP_NET_CFG_CAP: Capabilities (same bits as %NFP_NET_CFG_CTRL) 183 * %NFP_NET_CFG_CAP: Capabilities (same bits as %NFP_NET_CFG_CTRL)
184 * %NFP_NET_CFG_MAX_TXRINGS: Maximum number of TX rings 184 * %NFP_NET_CFG_MAX_TXRINGS: Maximum number of TX rings
185 * %NFP_NET_CFG_MAX_RXRINGS: Maximum number of RX rings 185 * %NFP_NET_CFG_MAX_RXRINGS: Maximum number of RX rings
186 * %NFP_NET_CFG_MAX_MTU: Maximum support MTU 186 * %NFP_NET_CFG_MAX_MTU: Maximum support MTU
@@ -190,37 +190,37 @@
190 * TODO: 190 * TODO:
191 * - define more STS bits 191 * - define more STS bits
192 */ 192 */
193#define NFP_NET_CFG_VERSION 0x0030 193#define NFP_NET_CFG_VERSION 0x0030
194#define NFP_NET_CFG_VERSION_RESERVED_MASK (0xff << 24) 194#define NFP_NET_CFG_VERSION_RESERVED_MASK (0xff << 24)
195#define NFP_NET_CFG_VERSION_CLASS_MASK (0xff << 16) 195#define NFP_NET_CFG_VERSION_CLASS_MASK (0xff << 16)
196#define NFP_NET_CFG_VERSION_CLASS(x) (((x) & 0xff) << 16) 196#define NFP_NET_CFG_VERSION_CLASS(x) (((x) & 0xff) << 16)
197#define NFP_NET_CFG_VERSION_CLASS_GENERIC 0 197#define NFP_NET_CFG_VERSION_CLASS_GENERIC 0
198#define NFP_NET_CFG_VERSION_MAJOR_MASK (0xff << 8) 198#define NFP_NET_CFG_VERSION_MAJOR_MASK (0xff << 8)
199#define NFP_NET_CFG_VERSION_MAJOR(x) (((x) & 0xff) << 8) 199#define NFP_NET_CFG_VERSION_MAJOR(x) (((x) & 0xff) << 8)
200#define NFP_NET_CFG_VERSION_MINOR_MASK (0xff << 0) 200#define NFP_NET_CFG_VERSION_MINOR_MASK (0xff << 0)
201#define NFP_NET_CFG_VERSION_MINOR(x) (((x) & 0xff) << 0) 201#define NFP_NET_CFG_VERSION_MINOR(x) (((x) & 0xff) << 0)
202#define NFP_NET_CFG_STS 0x0034 202#define NFP_NET_CFG_STS 0x0034
203#define NFP_NET_CFG_STS_LINK (0x1 << 0) /* Link up or down */ 203#define NFP_NET_CFG_STS_LINK (0x1 << 0) /* Link up or down */
204/* Link rate */ 204/* Link rate */
205#define NFP_NET_CFG_STS_LINK_RATE_SHIFT 1 205#define NFP_NET_CFG_STS_LINK_RATE_SHIFT 1
206#define NFP_NET_CFG_STS_LINK_RATE_MASK 0xF 206#define NFP_NET_CFG_STS_LINK_RATE_MASK 0xF
207#define NFP_NET_CFG_STS_LINK_RATE \ 207#define NFP_NET_CFG_STS_LINK_RATE \
208 (NFP_NET_CFG_STS_LINK_RATE_MASK << NFP_NET_CFG_STS_LINK_RATE_SHIFT) 208 (NFP_NET_CFG_STS_LINK_RATE_MASK << NFP_NET_CFG_STS_LINK_RATE_SHIFT)
209#define NFP_NET_CFG_STS_LINK_RATE_UNSUPPORTED 0 209#define NFP_NET_CFG_STS_LINK_RATE_UNSUPPORTED 0
210#define NFP_NET_CFG_STS_LINK_RATE_UNKNOWN 1 210#define NFP_NET_CFG_STS_LINK_RATE_UNKNOWN 1
211#define NFP_NET_CFG_STS_LINK_RATE_1G 2 211#define NFP_NET_CFG_STS_LINK_RATE_1G 2
212#define NFP_NET_CFG_STS_LINK_RATE_10G 3 212#define NFP_NET_CFG_STS_LINK_RATE_10G 3
213#define NFP_NET_CFG_STS_LINK_RATE_25G 4 213#define NFP_NET_CFG_STS_LINK_RATE_25G 4
214#define NFP_NET_CFG_STS_LINK_RATE_40G 5 214#define NFP_NET_CFG_STS_LINK_RATE_40G 5
215#define NFP_NET_CFG_STS_LINK_RATE_50G 6 215#define NFP_NET_CFG_STS_LINK_RATE_50G 6
216#define NFP_NET_CFG_STS_LINK_RATE_100G 7 216#define NFP_NET_CFG_STS_LINK_RATE_100G 7
217#define NFP_NET_CFG_CAP 0x0038 217#define NFP_NET_CFG_CAP 0x0038
218#define NFP_NET_CFG_MAX_TXRINGS 0x003c 218#define NFP_NET_CFG_MAX_TXRINGS 0x003c
219#define NFP_NET_CFG_MAX_RXRINGS 0x0040 219#define NFP_NET_CFG_MAX_RXRINGS 0x0040
220#define NFP_NET_CFG_MAX_MTU 0x0044 220#define NFP_NET_CFG_MAX_MTU 0x0044
221/* Next two words are being used by VFs for solving THB350 issue */ 221/* Next two words are being used by VFs for solving THB350 issue */
222#define NFP_NET_CFG_START_TXQ 0x0048 222#define NFP_NET_CFG_START_TXQ 0x0048
223#define NFP_NET_CFG_START_RXQ 0x004c 223#define NFP_NET_CFG_START_RXQ 0x004c
224 224
225/** 225/**
226 * Prepend configuration 226 * Prepend configuration
@@ -280,8 +280,8 @@
280/** 280/**
281 * 40B reserved for future use (0x0098 - 0x00c0) 281 * 40B reserved for future use (0x0098 - 0x00c0)
282 */ 282 */
283#define NFP_NET_CFG_RESERVED 0x0098 283#define NFP_NET_CFG_RESERVED 0x0098
284#define NFP_NET_CFG_RESERVED_SZ 0x0028 284#define NFP_NET_CFG_RESERVED_SZ 0x0028
285 285
286/** 286/**
287 * RSS configuration (0x0100 - 0x01ac): 287 * RSS configuration (0x0100 - 0x01ac):
@@ -290,26 +290,26 @@
290 * %NFP_NET_CFG_RSS_KEY: RSS "secret" key 290 * %NFP_NET_CFG_RSS_KEY: RSS "secret" key
291 * %NFP_NET_CFG_RSS_ITBL: RSS indirection table 291 * %NFP_NET_CFG_RSS_ITBL: RSS indirection table
292 */ 292 */
293#define NFP_NET_CFG_RSS_BASE 0x0100 293#define NFP_NET_CFG_RSS_BASE 0x0100
294#define NFP_NET_CFG_RSS_CTRL NFP_NET_CFG_RSS_BASE 294#define NFP_NET_CFG_RSS_CTRL NFP_NET_CFG_RSS_BASE
295#define NFP_NET_CFG_RSS_MASK (0x7f) 295#define NFP_NET_CFG_RSS_MASK (0x7f)
296#define NFP_NET_CFG_RSS_MASK_of(_x) ((_x) & 0x7f) 296#define NFP_NET_CFG_RSS_MASK_of(_x) ((_x) & 0x7f)
297#define NFP_NET_CFG_RSS_IPV4 (1 << 8) /* RSS for IPv4 */ 297#define NFP_NET_CFG_RSS_IPV4 (1 << 8) /* RSS for IPv4 */
298#define NFP_NET_CFG_RSS_IPV6 (1 << 9) /* RSS for IPv6 */ 298#define NFP_NET_CFG_RSS_IPV6 (1 << 9) /* RSS for IPv6 */
299#define NFP_NET_CFG_RSS_IPV4_TCP (1 << 10) /* RSS for IPv4/TCP */ 299#define NFP_NET_CFG_RSS_IPV4_TCP (1 << 10) /* RSS for IPv4/TCP */
300#define NFP_NET_CFG_RSS_IPV4_UDP (1 << 11) /* RSS for IPv4/UDP */ 300#define NFP_NET_CFG_RSS_IPV4_UDP (1 << 11) /* RSS for IPv4/UDP */
301#define NFP_NET_CFG_RSS_IPV6_TCP (1 << 12) /* RSS for IPv6/TCP */ 301#define NFP_NET_CFG_RSS_IPV6_TCP (1 << 12) /* RSS for IPv6/TCP */
302#define NFP_NET_CFG_RSS_IPV6_UDP (1 << 13) /* RSS for IPv6/UDP */ 302#define NFP_NET_CFG_RSS_IPV6_UDP (1 << 13) /* RSS for IPv6/UDP */
303#define NFP_NET_CFG_RSS_HFUNC 0xff000000 303#define NFP_NET_CFG_RSS_HFUNC 0xff000000
304#define NFP_NET_CFG_RSS_TOEPLITZ (1 << 24) /* Use Toeplitz hash */ 304#define NFP_NET_CFG_RSS_TOEPLITZ (1 << 24) /* Use Toeplitz hash */
305#define NFP_NET_CFG_RSS_XOR (1 << 25) /* Use XOR as hash */ 305#define NFP_NET_CFG_RSS_XOR (1 << 25) /* Use XOR as hash */
306#define NFP_NET_CFG_RSS_CRC32 (1 << 26) /* Use CRC32 as hash */ 306#define NFP_NET_CFG_RSS_CRC32 (1 << 26) /* Use CRC32 as hash */
307#define NFP_NET_CFG_RSS_HFUNCS 3 307#define NFP_NET_CFG_RSS_HFUNCS 3
308#define NFP_NET_CFG_RSS_KEY (NFP_NET_CFG_RSS_BASE + 0x4) 308#define NFP_NET_CFG_RSS_KEY (NFP_NET_CFG_RSS_BASE + 0x4)
309#define NFP_NET_CFG_RSS_KEY_SZ 0x28 309#define NFP_NET_CFG_RSS_KEY_SZ 0x28
310#define NFP_NET_CFG_RSS_ITBL (NFP_NET_CFG_RSS_BASE + 0x4 + \ 310#define NFP_NET_CFG_RSS_ITBL (NFP_NET_CFG_RSS_BASE + 0x4 + \
311 NFP_NET_CFG_RSS_KEY_SZ) 311 NFP_NET_CFG_RSS_KEY_SZ)
312#define NFP_NET_CFG_RSS_ITBL_SZ 0x80 312#define NFP_NET_CFG_RSS_ITBL_SZ 0x80
313 313
314/** 314/**
315 * TX ring configuration (0x200 - 0x800) 315 * TX ring configuration (0x200 - 0x800)
@@ -321,13 +321,13 @@
321 * %NFP_NET_CFG_TXR_PRIO: Per TX ring priority (1B entries) 321 * %NFP_NET_CFG_TXR_PRIO: Per TX ring priority (1B entries)
322 * %NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet 322 * %NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet
323 */ 323 */
324#define NFP_NET_CFG_TXR_BASE 0x0200 324#define NFP_NET_CFG_TXR_BASE 0x0200
325#define NFP_NET_CFG_TXR_ADDR(_x) (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8)) 325#define NFP_NET_CFG_TXR_ADDR(_x) (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8))
326#define NFP_NET_CFG_TXR_WB_ADDR(_x) (NFP_NET_CFG_TXR_BASE + 0x200 + \ 326#define NFP_NET_CFG_TXR_WB_ADDR(_x) (NFP_NET_CFG_TXR_BASE + 0x200 + \
327 ((_x) * 0x8)) 327 ((_x) * 0x8))
328#define NFP_NET_CFG_TXR_SZ(_x) (NFP_NET_CFG_TXR_BASE + 0x400 + (_x)) 328#define NFP_NET_CFG_TXR_SZ(_x) (NFP_NET_CFG_TXR_BASE + 0x400 + (_x))
329#define NFP_NET_CFG_TXR_VEC(_x) (NFP_NET_CFG_TXR_BASE + 0x440 + (_x)) 329#define NFP_NET_CFG_TXR_VEC(_x) (NFP_NET_CFG_TXR_BASE + 0x440 + (_x))
330#define NFP_NET_CFG_TXR_PRIO(_x) (NFP_NET_CFG_TXR_BASE + 0x480 + (_x)) 330#define NFP_NET_CFG_TXR_PRIO(_x) (NFP_NET_CFG_TXR_BASE + 0x480 + (_x))
331#define NFP_NET_CFG_TXR_IRQ_MOD(_x) (NFP_NET_CFG_TXR_BASE + 0x500 + \ 331#define NFP_NET_CFG_TXR_IRQ_MOD(_x) (NFP_NET_CFG_TXR_BASE + 0x500 + \
332 ((_x) * 0x4)) 332 ((_x) * 0x4))
333 333
@@ -340,11 +340,11 @@
340 * %NFP_NET_CFG_RXR_PRIO: Per RX ring priority (1B entries) 340 * %NFP_NET_CFG_RXR_PRIO: Per RX ring priority (1B entries)
341 * %NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries) 341 * %NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries)
342 */ 342 */
343#define NFP_NET_CFG_RXR_BASE 0x0800 343#define NFP_NET_CFG_RXR_BASE 0x0800
344#define NFP_NET_CFG_RXR_ADDR(_x) (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8)) 344#define NFP_NET_CFG_RXR_ADDR(_x) (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8))
345#define NFP_NET_CFG_RXR_SZ(_x) (NFP_NET_CFG_RXR_BASE + 0x200 + (_x)) 345#define NFP_NET_CFG_RXR_SZ(_x) (NFP_NET_CFG_RXR_BASE + 0x200 + (_x))
346#define NFP_NET_CFG_RXR_VEC(_x) (NFP_NET_CFG_RXR_BASE + 0x240 + (_x)) 346#define NFP_NET_CFG_RXR_VEC(_x) (NFP_NET_CFG_RXR_BASE + 0x240 + (_x))
347#define NFP_NET_CFG_RXR_PRIO(_x) (NFP_NET_CFG_RXR_BASE + 0x280 + (_x)) 347#define NFP_NET_CFG_RXR_PRIO(_x) (NFP_NET_CFG_RXR_BASE + 0x280 + (_x))
348#define NFP_NET_CFG_RXR_IRQ_MOD(_x) (NFP_NET_CFG_RXR_BASE + 0x300 + \ 348#define NFP_NET_CFG_RXR_IRQ_MOD(_x) (NFP_NET_CFG_RXR_BASE + 0x300 + \
349 ((_x) * 0x4)) 349 ((_x) * 0x4))
350 350
@@ -358,36 +358,36 @@
358 * the MSI-X entry and the host driver must clear the register to 358 * the MSI-X entry and the host driver must clear the register to
359 * re-enable the interrupt. 359 * re-enable the interrupt.
360 */ 360 */
361#define NFP_NET_CFG_ICR_BASE 0x0c00 361#define NFP_NET_CFG_ICR_BASE 0x0c00
362#define NFP_NET_CFG_ICR(_x) (NFP_NET_CFG_ICR_BASE + (_x)) 362#define NFP_NET_CFG_ICR(_x) (NFP_NET_CFG_ICR_BASE + (_x))
363#define NFP_NET_CFG_ICR_UNMASKED 0x0 363#define NFP_NET_CFG_ICR_UNMASKED 0x0
364#define NFP_NET_CFG_ICR_RXTX 0x1 364#define NFP_NET_CFG_ICR_RXTX 0x1
365#define NFP_NET_CFG_ICR_LSC 0x2 365#define NFP_NET_CFG_ICR_LSC 0x2
366 366
367/** 367/**
368 * General device stats (0x0d00 - 0x0d90) 368 * General device stats (0x0d00 - 0x0d90)
369 * all counters are 64bit. 369 * all counters are 64bit.
370 */ 370 */
371#define NFP_NET_CFG_STATS_BASE 0x0d00 371#define NFP_NET_CFG_STATS_BASE 0x0d00
372#define NFP_NET_CFG_STATS_RX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x00) 372#define NFP_NET_CFG_STATS_RX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x00)
373#define NFP_NET_CFG_STATS_RX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x08) 373#define NFP_NET_CFG_STATS_RX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x08)
374#define NFP_NET_CFG_STATS_RX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x10) 374#define NFP_NET_CFG_STATS_RX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x10)
375#define NFP_NET_CFG_STATS_RX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x18) 375#define NFP_NET_CFG_STATS_RX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x18)
376#define NFP_NET_CFG_STATS_RX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x20) 376#define NFP_NET_CFG_STATS_RX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x20)
377#define NFP_NET_CFG_STATS_RX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x28) 377#define NFP_NET_CFG_STATS_RX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x28)
378#define NFP_NET_CFG_STATS_RX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x30) 378#define NFP_NET_CFG_STATS_RX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x30)
379#define NFP_NET_CFG_STATS_RX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x38) 379#define NFP_NET_CFG_STATS_RX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x38)
380#define NFP_NET_CFG_STATS_RX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x40) 380#define NFP_NET_CFG_STATS_RX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x40)
381 381
382#define NFP_NET_CFG_STATS_TX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x48) 382#define NFP_NET_CFG_STATS_TX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x48)
383#define NFP_NET_CFG_STATS_TX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x50) 383#define NFP_NET_CFG_STATS_TX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x50)
384#define NFP_NET_CFG_STATS_TX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x58) 384#define NFP_NET_CFG_STATS_TX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x58)
385#define NFP_NET_CFG_STATS_TX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x60) 385#define NFP_NET_CFG_STATS_TX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x60)
386#define NFP_NET_CFG_STATS_TX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x68) 386#define NFP_NET_CFG_STATS_TX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x68)
387#define NFP_NET_CFG_STATS_TX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x70) 387#define NFP_NET_CFG_STATS_TX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x70)
388#define NFP_NET_CFG_STATS_TX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x78) 388#define NFP_NET_CFG_STATS_TX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x78)
389#define NFP_NET_CFG_STATS_TX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x80) 389#define NFP_NET_CFG_STATS_TX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x80)
390#define NFP_NET_CFG_STATS_TX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x88) 390#define NFP_NET_CFG_STATS_TX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x88)
391 391
392#define NFP_NET_CFG_STATS_APP0_FRAMES (NFP_NET_CFG_STATS_BASE + 0x90) 392#define NFP_NET_CFG_STATS_APP0_FRAMES (NFP_NET_CFG_STATS_BASE + 0x90)
393#define NFP_NET_CFG_STATS_APP0_BYTES (NFP_NET_CFG_STATS_BASE + 0x98) 393#define NFP_NET_CFG_STATS_APP0_BYTES (NFP_NET_CFG_STATS_BASE + 0x98)
@@ -404,11 +404,11 @@
404 * %NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count) 404 * %NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count)
405 * %NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count) 405 * %NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count)
406 */ 406 */
407#define NFP_NET_CFG_TXR_STATS_BASE 0x1000 407#define NFP_NET_CFG_TXR_STATS_BASE 0x1000
408#define NFP_NET_CFG_TXR_STATS(_x) (NFP_NET_CFG_TXR_STATS_BASE + \ 408#define NFP_NET_CFG_TXR_STATS(_x) (NFP_NET_CFG_TXR_STATS_BASE + \
409 ((_x) * 0x10)) 409 ((_x) * 0x10))
410#define NFP_NET_CFG_RXR_STATS_BASE 0x1400 410#define NFP_NET_CFG_RXR_STATS_BASE 0x1400
411#define NFP_NET_CFG_RXR_STATS(_x) (NFP_NET_CFG_RXR_STATS_BASE + \ 411#define NFP_NET_CFG_RXR_STATS(_x) (NFP_NET_CFG_RXR_STATS_BASE + \
412 ((_x) * 0x10)) 412 ((_x) * 0x10))
413 413
414/** 414/**
@@ -444,7 +444,7 @@
444 * %NFP_NET_CFG_TLV_TYPE: Offset of type within the TLV 444 * %NFP_NET_CFG_TLV_TYPE: Offset of type within the TLV
445 * %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV 445 * %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV
446 * %NFP_NET_CFG_TLV_LENGTH: Offset of length within the TLV 446 * %NFP_NET_CFG_TLV_LENGTH: Offset of length within the TLV
447 * %NFP_NET_CFG_TLV_LENGTH_INC: TLV length increments 447 * %NFP_NET_CFG_TLV_LENGTH_INC: TLV length increments
448 * %NFP_NET_CFG_TLV_VALUE: Offset of value with the TLV 448 * %NFP_NET_CFG_TLV_VALUE: Offset of value with the TLV
449 * 449 *
450 * List of simple TLV structures, first one starts at %NFP_NET_CFG_TLV_BASE. 450 * List of simple TLV structures, first one starts at %NFP_NET_CFG_TLV_BASE.
@@ -457,12 +457,12 @@
457 * Note that the 4 byte TLV header is not counted in %NFP_NET_CFG_TLV_LENGTH. 457 * Note that the 4 byte TLV header is not counted in %NFP_NET_CFG_TLV_LENGTH.
458 */ 458 */
459#define NFP_NET_CFG_TLV_TYPE 0x00 459#define NFP_NET_CFG_TLV_TYPE 0x00
460#define NFP_NET_CFG_TLV_TYPE_REQUIRED 0x8000 460#define NFP_NET_CFG_TLV_TYPE_REQUIRED 0x8000
461#define NFP_NET_CFG_TLV_LENGTH 0x02 461#define NFP_NET_CFG_TLV_LENGTH 0x02
462#define NFP_NET_CFG_TLV_LENGTH_INC 4 462#define NFP_NET_CFG_TLV_LENGTH_INC 4
463#define NFP_NET_CFG_TLV_VALUE 0x04 463#define NFP_NET_CFG_TLV_VALUE 0x04
464 464
465#define NFP_NET_CFG_TLV_HEADER_REQUIRED 0x80000000 465#define NFP_NET_CFG_TLV_HEADER_REQUIRED 0x80000000
466#define NFP_NET_CFG_TLV_HEADER_TYPE 0x7fff0000 466#define NFP_NET_CFG_TLV_HEADER_TYPE 0x7fff0000
467#define NFP_NET_CFG_TLV_HEADER_LENGTH 0x0000ffff 467#define NFP_NET_CFG_TLV_HEADER_LENGTH 0x0000ffff
468 468
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index 7e7704daf5f1..c4949183eef3 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -43,12 +43,6 @@
43 43
44/* Local Definitions and Declarations */ 44/* Local Definitions and Declarations */
45 45
46struct rmnet_walk_data {
47 struct net_device *real_dev;
48 struct list_head *head;
49 struct rmnet_port *port;
50};
51
52static int rmnet_is_real_dev_registered(const struct net_device *real_dev) 46static int rmnet_is_real_dev_registered(const struct net_device *real_dev)
53{ 47{
54 return rcu_access_pointer(real_dev->rx_handler) == rmnet_rx_handler; 48 return rcu_access_pointer(real_dev->rx_handler) == rmnet_rx_handler;
@@ -112,17 +106,14 @@ static int rmnet_register_real_device(struct net_device *real_dev)
112static void rmnet_unregister_bridge(struct net_device *dev, 106static void rmnet_unregister_bridge(struct net_device *dev,
113 struct rmnet_port *port) 107 struct rmnet_port *port)
114{ 108{
115 struct net_device *rmnet_dev, *bridge_dev;
116 struct rmnet_port *bridge_port; 109 struct rmnet_port *bridge_port;
110 struct net_device *bridge_dev;
117 111
118 if (port->rmnet_mode != RMNET_EPMODE_BRIDGE) 112 if (port->rmnet_mode != RMNET_EPMODE_BRIDGE)
119 return; 113 return;
120 114
121 /* bridge slave handling */ 115 /* bridge slave handling */
122 if (!port->nr_rmnet_devs) { 116 if (!port->nr_rmnet_devs) {
123 rmnet_dev = netdev_master_upper_dev_get_rcu(dev);
124 netdev_upper_dev_unlink(dev, rmnet_dev);
125
126 bridge_dev = port->bridge_ep; 117 bridge_dev = port->bridge_ep;
127 118
128 bridge_port = rmnet_get_port_rtnl(bridge_dev); 119 bridge_port = rmnet_get_port_rtnl(bridge_dev);
@@ -132,9 +123,6 @@ static void rmnet_unregister_bridge(struct net_device *dev,
132 bridge_dev = port->bridge_ep; 123 bridge_dev = port->bridge_ep;
133 124
134 bridge_port = rmnet_get_port_rtnl(bridge_dev); 125 bridge_port = rmnet_get_port_rtnl(bridge_dev);
135 rmnet_dev = netdev_master_upper_dev_get_rcu(bridge_dev);
136 netdev_upper_dev_unlink(bridge_dev, rmnet_dev);
137
138 rmnet_unregister_real_device(bridge_dev, bridge_port); 126 rmnet_unregister_real_device(bridge_dev, bridge_port);
139 } 127 }
140} 128}
@@ -173,10 +161,6 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
173 if (err) 161 if (err)
174 goto err1; 162 goto err1;
175 163
176 err = netdev_master_upper_dev_link(dev, real_dev, NULL, NULL, extack);
177 if (err)
178 goto err2;
179
180 port->rmnet_mode = mode; 164 port->rmnet_mode = mode;
181 165
182 hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]); 166 hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]);
@@ -193,8 +177,6 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
193 177
194 return 0; 178 return 0;
195 179
196err2:
197 rmnet_vnd_dellink(mux_id, port, ep);
198err1: 180err1:
199 rmnet_unregister_real_device(real_dev, port); 181 rmnet_unregister_real_device(real_dev, port);
200err0: 182err0:
@@ -204,14 +186,13 @@ err0:
204 186
205static void rmnet_dellink(struct net_device *dev, struct list_head *head) 187static void rmnet_dellink(struct net_device *dev, struct list_head *head)
206{ 188{
189 struct rmnet_priv *priv = netdev_priv(dev);
207 struct net_device *real_dev; 190 struct net_device *real_dev;
208 struct rmnet_endpoint *ep; 191 struct rmnet_endpoint *ep;
209 struct rmnet_port *port; 192 struct rmnet_port *port;
210 u8 mux_id; 193 u8 mux_id;
211 194
212 rcu_read_lock(); 195 real_dev = priv->real_dev;
213 real_dev = netdev_master_upper_dev_get_rcu(dev);
214 rcu_read_unlock();
215 196
216 if (!real_dev || !rmnet_is_real_dev_registered(real_dev)) 197 if (!real_dev || !rmnet_is_real_dev_registered(real_dev))
217 return; 198 return;
@@ -219,7 +200,6 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head)
219 port = rmnet_get_port_rtnl(real_dev); 200 port = rmnet_get_port_rtnl(real_dev);
220 201
221 mux_id = rmnet_vnd_get_mux(dev); 202 mux_id = rmnet_vnd_get_mux(dev);
222 netdev_upper_dev_unlink(dev, real_dev);
223 203
224 ep = rmnet_get_endpoint(port, mux_id); 204 ep = rmnet_get_endpoint(port, mux_id);
225 if (ep) { 205 if (ep) {
@@ -233,30 +213,13 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head)
233 unregister_netdevice_queue(dev, head); 213 unregister_netdevice_queue(dev, head);
234} 214}
235 215
236static int rmnet_dev_walk_unreg(struct net_device *rmnet_dev, void *data)
237{
238 struct rmnet_walk_data *d = data;
239 struct rmnet_endpoint *ep;
240 u8 mux_id;
241
242 mux_id = rmnet_vnd_get_mux(rmnet_dev);
243 ep = rmnet_get_endpoint(d->port, mux_id);
244 if (ep) {
245 hlist_del_init_rcu(&ep->hlnode);
246 rmnet_vnd_dellink(mux_id, d->port, ep);
247 kfree(ep);
248 }
249 netdev_upper_dev_unlink(rmnet_dev, d->real_dev);
250 unregister_netdevice_queue(rmnet_dev, d->head);
251
252 return 0;
253}
254
255static void rmnet_force_unassociate_device(struct net_device *dev) 216static void rmnet_force_unassociate_device(struct net_device *dev)
256{ 217{
257 struct net_device *real_dev = dev; 218 struct net_device *real_dev = dev;
258 struct rmnet_walk_data d; 219 struct hlist_node *tmp_ep;
220 struct rmnet_endpoint *ep;
259 struct rmnet_port *port; 221 struct rmnet_port *port;
222 unsigned long bkt_ep;
260 LIST_HEAD(list); 223 LIST_HEAD(list);
261 224
262 if (!rmnet_is_real_dev_registered(real_dev)) 225 if (!rmnet_is_real_dev_registered(real_dev))
@@ -264,16 +227,19 @@ static void rmnet_force_unassociate_device(struct net_device *dev)
264 227
265 ASSERT_RTNL(); 228 ASSERT_RTNL();
266 229
267 d.real_dev = real_dev;
268 d.head = &list;
269
270 port = rmnet_get_port_rtnl(dev); 230 port = rmnet_get_port_rtnl(dev);
271 d.port = port;
272 231
273 rcu_read_lock(); 232 rcu_read_lock();
274 rmnet_unregister_bridge(dev, port); 233 rmnet_unregister_bridge(dev, port);
275 234
276 netdev_walk_all_lower_dev_rcu(real_dev, rmnet_dev_walk_unreg, &d); 235 hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) {
236 unregister_netdevice_queue(ep->egress_dev, &list);
237 rmnet_vnd_dellink(ep->mux_id, port, ep);
238
239 hlist_del_init_rcu(&ep->hlnode);
240 kfree(ep);
241 }
242
277 rcu_read_unlock(); 243 rcu_read_unlock();
278 unregister_netdevice_many(&list); 244 unregister_netdevice_many(&list);
279 245
@@ -422,11 +388,6 @@ int rmnet_add_bridge(struct net_device *rmnet_dev,
422 if (err) 388 if (err)
423 return -EBUSY; 389 return -EBUSY;
424 390
425 err = netdev_master_upper_dev_link(slave_dev, rmnet_dev, NULL, NULL,
426 extack);
427 if (err)
428 return -EINVAL;
429
430 slave_port = rmnet_get_port(slave_dev); 391 slave_port = rmnet_get_port(slave_dev);
431 slave_port->rmnet_mode = RMNET_EPMODE_BRIDGE; 392 slave_port->rmnet_mode = RMNET_EPMODE_BRIDGE;
432 slave_port->bridge_ep = real_dev; 393 slave_port->bridge_ep = real_dev;
@@ -449,7 +410,6 @@ int rmnet_del_bridge(struct net_device *rmnet_dev,
449 port->rmnet_mode = RMNET_EPMODE_VND; 410 port->rmnet_mode = RMNET_EPMODE_VND;
450 port->bridge_ep = NULL; 411 port->bridge_ep = NULL;
451 412
452 netdev_upper_dev_unlink(slave_dev, rmnet_dev);
453 slave_port = rmnet_get_port(slave_dev); 413 slave_port = rmnet_get_port(slave_dev);
454 rmnet_unregister_real_device(slave_dev, slave_port); 414 rmnet_unregister_real_device(slave_dev, slave_port);
455 415
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
index 6bc328fb88e1..b0dbca070c00 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
@@ -38,6 +38,11 @@ static u8 rmnet_map_do_flow_control(struct sk_buff *skb,
38 } 38 }
39 39
40 ep = rmnet_get_endpoint(port, mux_id); 40 ep = rmnet_get_endpoint(port, mux_id);
41 if (!ep) {
42 kfree_skb(skb);
43 return RX_HANDLER_CONSUMED;
44 }
45
41 vnd = ep->egress_dev; 46 vnd = ep->egress_dev;
42 47
43 ip_family = cmd->flow_control.ip_family; 48 ip_family = cmd->flow_control.ip_family;
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index 570a227acdd8..346d310914df 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -121,7 +121,7 @@ static void rmnet_get_stats64(struct net_device *dev,
121 memset(&total_stats, 0, sizeof(struct rmnet_vnd_stats)); 121 memset(&total_stats, 0, sizeof(struct rmnet_vnd_stats));
122 122
123 for_each_possible_cpu(cpu) { 123 for_each_possible_cpu(cpu) {
124 pcpu_ptr = this_cpu_ptr(priv->pcpu_stats); 124 pcpu_ptr = per_cpu_ptr(priv->pcpu_stats, cpu);
125 125
126 do { 126 do {
127 start = u64_stats_fetch_begin_irq(&pcpu_ptr->syncp); 127 start = u64_stats_fetch_begin_irq(&pcpu_ptr->syncp);
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 0bf7d1759250..c16b97a56d9f 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -736,9 +736,8 @@ struct ring_info {
736}; 736};
737 737
738enum features { 738enum features {
739 RTL_FEATURE_WOL = (1 << 0), 739 RTL_FEATURE_MSI = (1 << 0),
740 RTL_FEATURE_MSI = (1 << 1), 740 RTL_FEATURE_GMII = (1 << 1),
741 RTL_FEATURE_GMII = (1 << 2),
742}; 741};
743 742
744struct rtl8169_counters { 743struct rtl8169_counters {
@@ -1859,10 +1858,6 @@ static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
1859 1858
1860 rtl_lock_work(tp); 1859 rtl_lock_work(tp);
1861 1860
1862 if (wol->wolopts)
1863 tp->features |= RTL_FEATURE_WOL;
1864 else
1865 tp->features &= ~RTL_FEATURE_WOL;
1866 if (pm_runtime_active(d)) 1861 if (pm_runtime_active(d))
1867 __rtl8169_set_wol(tp, wol->wolopts); 1862 __rtl8169_set_wol(tp, wol->wolopts);
1868 else 1863 else
@@ -3805,8 +3800,6 @@ static void rtl8168e_2_hw_phy_config(struct rtl8169_private *tp)
3805 rtl_writephy(tp, 0x1f, 0x0005); 3800 rtl_writephy(tp, 0x1f, 0x0005);
3806 rtl_w0w1_phy(tp, 0x01, 0x0100, 0x0000); 3801 rtl_w0w1_phy(tp, 0x01, 0x0100, 0x0000);
3807 rtl_writephy(tp, 0x1f, 0x0000); 3802 rtl_writephy(tp, 0x1f, 0x0000);
3808 /* soft-reset phy */
3809 rtl_writephy(tp, MII_BMCR, BMCR_RESET | BMCR_ANENABLE | BMCR_ANRESTART);
3810 3803
3811 /* Broken BIOS workaround: feed GigaMAC registers with MAC address. */ 3804 /* Broken BIOS workaround: feed GigaMAC registers with MAC address. */
3812 rtl_rar_exgmac_set(tp, tp->dev->dev_addr); 3805 rtl_rar_exgmac_set(tp, tp->dev->dev_addr);
@@ -8521,36 +8514,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
8521 RTL_W8(Cfg9346, Cfg9346_Unlock); 8514 RTL_W8(Cfg9346, Cfg9346_Unlock);
8522 RTL_W8(Config1, RTL_R8(Config1) | PMEnable); 8515 RTL_W8(Config1, RTL_R8(Config1) | PMEnable);
8523 RTL_W8(Config5, RTL_R8(Config5) & (BWF | MWF | UWF | LanWake | PMEStatus)); 8516 RTL_W8(Config5, RTL_R8(Config5) & (BWF | MWF | UWF | LanWake | PMEStatus));
8524 switch (tp->mac_version) {
8525 case RTL_GIGA_MAC_VER_34:
8526 case RTL_GIGA_MAC_VER_35:
8527 case RTL_GIGA_MAC_VER_36:
8528 case RTL_GIGA_MAC_VER_37:
8529 case RTL_GIGA_MAC_VER_38:
8530 case RTL_GIGA_MAC_VER_40:
8531 case RTL_GIGA_MAC_VER_41:
8532 case RTL_GIGA_MAC_VER_42:
8533 case RTL_GIGA_MAC_VER_43:
8534 case RTL_GIGA_MAC_VER_44:
8535 case RTL_GIGA_MAC_VER_45:
8536 case RTL_GIGA_MAC_VER_46:
8537 case RTL_GIGA_MAC_VER_47:
8538 case RTL_GIGA_MAC_VER_48:
8539 case RTL_GIGA_MAC_VER_49:
8540 case RTL_GIGA_MAC_VER_50:
8541 case RTL_GIGA_MAC_VER_51:
8542 if (rtl_eri_read(tp, 0xdc, ERIAR_EXGMAC) & MagicPacket_v2)
8543 tp->features |= RTL_FEATURE_WOL;
8544 if ((RTL_R8(Config3) & LinkUp) != 0)
8545 tp->features |= RTL_FEATURE_WOL;
8546 break;
8547 default:
8548 if ((RTL_R8(Config3) & (LinkUp | MagicPacket)) != 0)
8549 tp->features |= RTL_FEATURE_WOL;
8550 break;
8551 }
8552 if ((RTL_R8(Config5) & (UWF | BWF | MWF)) != 0)
8553 tp->features |= RTL_FEATURE_WOL;
8554 tp->features |= rtl_try_msi(tp, cfg); 8517 tp->features |= rtl_try_msi(tp, cfg);
8555 RTL_W8(Cfg9346, Cfg9346_Lock); 8518 RTL_W8(Cfg9346, Cfg9346_Lock);
8556 8519
diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index 96a27b00c90e..b81f4faf7b10 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -1018,6 +1018,7 @@ struct ravb_private {
1018 u32 dirty_rx[NUM_RX_QUEUE]; /* Producer ring indices */ 1018 u32 dirty_rx[NUM_RX_QUEUE]; /* Producer ring indices */
1019 u32 cur_tx[NUM_TX_QUEUE]; 1019 u32 cur_tx[NUM_TX_QUEUE];
1020 u32 dirty_tx[NUM_TX_QUEUE]; 1020 u32 dirty_tx[NUM_TX_QUEUE];
1021 u32 rx_buf_sz; /* Based on MTU+slack. */
1021 struct napi_struct napi[NUM_RX_QUEUE]; 1022 struct napi_struct napi[NUM_RX_QUEUE];
1022 struct work_struct work; 1023 struct work_struct work;
1023 /* MII transceiver section. */ 1024 /* MII transceiver section. */
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index c87f57ca4437..54a6265da7a0 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -238,7 +238,7 @@ static void ravb_ring_free(struct net_device *ndev, int q)
238 le32_to_cpu(desc->dptr))) 238 le32_to_cpu(desc->dptr)))
239 dma_unmap_single(ndev->dev.parent, 239 dma_unmap_single(ndev->dev.parent,
240 le32_to_cpu(desc->dptr), 240 le32_to_cpu(desc->dptr),
241 PKT_BUF_SZ, 241 priv->rx_buf_sz,
242 DMA_FROM_DEVICE); 242 DMA_FROM_DEVICE);
243 } 243 }
244 ring_size = sizeof(struct ravb_ex_rx_desc) * 244 ring_size = sizeof(struct ravb_ex_rx_desc) *
@@ -300,9 +300,9 @@ static void ravb_ring_format(struct net_device *ndev, int q)
300 for (i = 0; i < priv->num_rx_ring[q]; i++) { 300 for (i = 0; i < priv->num_rx_ring[q]; i++) {
301 /* RX descriptor */ 301 /* RX descriptor */
302 rx_desc = &priv->rx_ring[q][i]; 302 rx_desc = &priv->rx_ring[q][i];
303 rx_desc->ds_cc = cpu_to_le16(PKT_BUF_SZ); 303 rx_desc->ds_cc = cpu_to_le16(priv->rx_buf_sz);
304 dma_addr = dma_map_single(ndev->dev.parent, priv->rx_skb[q][i]->data, 304 dma_addr = dma_map_single(ndev->dev.parent, priv->rx_skb[q][i]->data,
305 PKT_BUF_SZ, 305 priv->rx_buf_sz,
306 DMA_FROM_DEVICE); 306 DMA_FROM_DEVICE);
307 /* We just set the data size to 0 for a failed mapping which 307 /* We just set the data size to 0 for a failed mapping which
308 * should prevent DMA from happening... 308 * should prevent DMA from happening...
@@ -346,6 +346,10 @@ static int ravb_ring_init(struct net_device *ndev, int q)
346 int ring_size; 346 int ring_size;
347 int i; 347 int i;
348 348
349 /* +16 gets room from the status from the card. */
350 priv->rx_buf_sz = (ndev->mtu <= 1492 ? PKT_BUF_SZ : ndev->mtu) +
351 ETH_HLEN + VLAN_HLEN;
352
349 /* Allocate RX and TX skb rings */ 353 /* Allocate RX and TX skb rings */
350 priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q], 354 priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q],
351 sizeof(*priv->rx_skb[q]), GFP_KERNEL); 355 sizeof(*priv->rx_skb[q]), GFP_KERNEL);
@@ -355,7 +359,7 @@ static int ravb_ring_init(struct net_device *ndev, int q)
355 goto error; 359 goto error;
356 360
357 for (i = 0; i < priv->num_rx_ring[q]; i++) { 361 for (i = 0; i < priv->num_rx_ring[q]; i++) {
358 skb = netdev_alloc_skb(ndev, PKT_BUF_SZ + RAVB_ALIGN - 1); 362 skb = netdev_alloc_skb(ndev, priv->rx_buf_sz + RAVB_ALIGN - 1);
359 if (!skb) 363 if (!skb)
360 goto error; 364 goto error;
361 ravb_set_buffer_align(skb); 365 ravb_set_buffer_align(skb);
@@ -586,7 +590,7 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
586 skb = priv->rx_skb[q][entry]; 590 skb = priv->rx_skb[q][entry];
587 priv->rx_skb[q][entry] = NULL; 591 priv->rx_skb[q][entry] = NULL;
588 dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr), 592 dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr),
589 PKT_BUF_SZ, 593 priv->rx_buf_sz,
590 DMA_FROM_DEVICE); 594 DMA_FROM_DEVICE);
591 get_ts &= (q == RAVB_NC) ? 595 get_ts &= (q == RAVB_NC) ?
592 RAVB_RXTSTAMP_TYPE_V2_L2_EVENT : 596 RAVB_RXTSTAMP_TYPE_V2_L2_EVENT :
@@ -619,11 +623,12 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
619 for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) { 623 for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) {
620 entry = priv->dirty_rx[q] % priv->num_rx_ring[q]; 624 entry = priv->dirty_rx[q] % priv->num_rx_ring[q];
621 desc = &priv->rx_ring[q][entry]; 625 desc = &priv->rx_ring[q][entry];
622 desc->ds_cc = cpu_to_le16(PKT_BUF_SZ); 626 desc->ds_cc = cpu_to_le16(priv->rx_buf_sz);
623 627
624 if (!priv->rx_skb[q][entry]) { 628 if (!priv->rx_skb[q][entry]) {
625 skb = netdev_alloc_skb(ndev, 629 skb = netdev_alloc_skb(ndev,
626 PKT_BUF_SZ + RAVB_ALIGN - 1); 630 priv->rx_buf_sz +
631 RAVB_ALIGN - 1);
627 if (!skb) 632 if (!skb)
628 break; /* Better luck next round. */ 633 break; /* Better luck next round. */
629 ravb_set_buffer_align(skb); 634 ravb_set_buffer_align(skb);
@@ -1854,6 +1859,17 @@ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
1854 return phy_mii_ioctl(phydev, req, cmd); 1859 return phy_mii_ioctl(phydev, req, cmd);
1855} 1860}
1856 1861
1862static int ravb_change_mtu(struct net_device *ndev, int new_mtu)
1863{
1864 if (netif_running(ndev))
1865 return -EBUSY;
1866
1867 ndev->mtu = new_mtu;
1868 netdev_update_features(ndev);
1869
1870 return 0;
1871}
1872
1857static void ravb_set_rx_csum(struct net_device *ndev, bool enable) 1873static void ravb_set_rx_csum(struct net_device *ndev, bool enable)
1858{ 1874{
1859 struct ravb_private *priv = netdev_priv(ndev); 1875 struct ravb_private *priv = netdev_priv(ndev);
@@ -1895,6 +1911,7 @@ static const struct net_device_ops ravb_netdev_ops = {
1895 .ndo_set_rx_mode = ravb_set_rx_mode, 1911 .ndo_set_rx_mode = ravb_set_rx_mode,
1896 .ndo_tx_timeout = ravb_tx_timeout, 1912 .ndo_tx_timeout = ravb_tx_timeout,
1897 .ndo_do_ioctl = ravb_do_ioctl, 1913 .ndo_do_ioctl = ravb_do_ioctl,
1914 .ndo_change_mtu = ravb_change_mtu,
1898 .ndo_validate_addr = eth_validate_addr, 1915 .ndo_validate_addr = eth_validate_addr,
1899 .ndo_set_mac_address = eth_mac_addr, 1916 .ndo_set_mac_address = eth_mac_addr,
1900 .ndo_set_features = ravb_set_features, 1917 .ndo_set_features = ravb_set_features,
@@ -2117,6 +2134,9 @@ static int ravb_probe(struct platform_device *pdev)
2117 goto out_release; 2134 goto out_release;
2118 } 2135 }
2119 2136
2137 ndev->max_mtu = 2048 - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN);
2138 ndev->min_mtu = ETH_MIN_MTU;
2139
2120 /* Set function */ 2140 /* Set function */
2121 ndev->netdev_ops = &ravb_netdev_ops; 2141 ndev->netdev_ops = &ravb_netdev_ops;
2122 ndev->ethtool_ops = &ravb_ethtool_ops; 2142 ndev->ethtool_ops = &ravb_ethtool_ops;
@@ -2255,9 +2275,6 @@ static int ravb_wol_setup(struct net_device *ndev)
2255 /* Enable MagicPacket */ 2275 /* Enable MagicPacket */
2256 ravb_modify(ndev, ECMR, ECMR_MPDE, ECMR_MPDE); 2276 ravb_modify(ndev, ECMR, ECMR_MPDE, ECMR_MPDE);
2257 2277
2258 /* Increased clock usage so device won't be suspended */
2259 clk_enable(priv->clk);
2260
2261 return enable_irq_wake(priv->emac_irq); 2278 return enable_irq_wake(priv->emac_irq);
2262} 2279}
2263 2280
@@ -2276,9 +2293,6 @@ static int ravb_wol_restore(struct net_device *ndev)
2276 if (ret < 0) 2293 if (ret < 0)
2277 return ret; 2294 return ret;
2278 2295
2279 /* Restore clock usage count */
2280 clk_disable(priv->clk);
2281
2282 return disable_irq_wake(priv->emac_irq); 2296 return disable_irq_wake(priv->emac_irq);
2283} 2297}
2284 2298
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index a197e11f3a56..d7d5a6d15219 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -40,7 +40,6 @@
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include <linux/ethtool.h> 41#include <linux/ethtool.h>
42#include <linux/if_vlan.h> 42#include <linux/if_vlan.h>
43#include <linux/clk.h>
44#include <linux/sh_eth.h> 43#include <linux/sh_eth.h>
45#include <linux/of_mdio.h> 44#include <linux/of_mdio.h>
46 45
@@ -962,20 +961,16 @@ static void sh_eth_set_default_cpu_data(struct sh_eth_cpu_data *cd)
962 961
963static int sh_eth_check_reset(struct net_device *ndev) 962static int sh_eth_check_reset(struct net_device *ndev)
964{ 963{
965 int ret = 0; 964 int cnt;
966 int cnt = 100;
967 965
968 while (cnt > 0) { 966 for (cnt = 100; cnt > 0; cnt--) {
969 if (!(sh_eth_read(ndev, EDMR) & EDMR_SRST_GETHER)) 967 if (!(sh_eth_read(ndev, EDMR) & EDMR_SRST_GETHER))
970 break; 968 return 0;
971 mdelay(1); 969 mdelay(1);
972 cnt--;
973 } 970 }
974 if (cnt <= 0) { 971
975 netdev_err(ndev, "Device reset failed\n"); 972 netdev_err(ndev, "Device reset failed\n");
976 ret = -ETIMEDOUT; 973 return -ETIMEDOUT;
977 }
978 return ret;
979} 974}
980 975
981static int sh_eth_reset(struct net_device *ndev) 976static int sh_eth_reset(struct net_device *ndev)
@@ -2304,7 +2299,7 @@ static void sh_eth_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
2304 wol->supported = 0; 2299 wol->supported = 0;
2305 wol->wolopts = 0; 2300 wol->wolopts = 0;
2306 2301
2307 if (mdp->cd->magic && mdp->clk) { 2302 if (mdp->cd->magic) {
2308 wol->supported = WAKE_MAGIC; 2303 wol->supported = WAKE_MAGIC;
2309 wol->wolopts = mdp->wol_enabled ? WAKE_MAGIC : 0; 2304 wol->wolopts = mdp->wol_enabled ? WAKE_MAGIC : 0;
2310 } 2305 }
@@ -2314,7 +2309,7 @@ static int sh_eth_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
2314{ 2309{
2315 struct sh_eth_private *mdp = netdev_priv(ndev); 2310 struct sh_eth_private *mdp = netdev_priv(ndev);
2316 2311
2317 if (!mdp->cd->magic || !mdp->clk || wol->wolopts & ~WAKE_MAGIC) 2312 if (!mdp->cd->magic || wol->wolopts & ~WAKE_MAGIC)
2318 return -EOPNOTSUPP; 2313 return -EOPNOTSUPP;
2319 2314
2320 mdp->wol_enabled = !!(wol->wolopts & WAKE_MAGIC); 2315 mdp->wol_enabled = !!(wol->wolopts & WAKE_MAGIC);
@@ -3153,11 +3148,6 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
3153 goto out_release; 3148 goto out_release;
3154 } 3149 }
3155 3150
3156 /* Get clock, if not found that's OK but Wake-On-Lan is unavailable */
3157 mdp->clk = devm_clk_get(&pdev->dev, NULL);
3158 if (IS_ERR(mdp->clk))
3159 mdp->clk = NULL;
3160
3161 ndev->base_addr = res->start; 3151 ndev->base_addr = res->start;
3162 3152
3163 spin_lock_init(&mdp->lock); 3153 spin_lock_init(&mdp->lock);
@@ -3278,7 +3268,7 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
3278 if (ret) 3268 if (ret)
3279 goto out_napi_del; 3269 goto out_napi_del;
3280 3270
3281 if (mdp->cd->magic && mdp->clk) 3271 if (mdp->cd->magic)
3282 device_set_wakeup_capable(&pdev->dev, 1); 3272 device_set_wakeup_capable(&pdev->dev, 1);
3283 3273
3284 /* print device information */ 3274 /* print device information */
@@ -3331,9 +3321,6 @@ static int sh_eth_wol_setup(struct net_device *ndev)
3331 /* Enable MagicPacket */ 3321 /* Enable MagicPacket */
3332 sh_eth_modify(ndev, ECMR, ECMR_MPDE, ECMR_MPDE); 3322 sh_eth_modify(ndev, ECMR, ECMR_MPDE, ECMR_MPDE);
3333 3323
3334 /* Increased clock usage so device won't be suspended */
3335 clk_enable(mdp->clk);
3336
3337 return enable_irq_wake(ndev->irq); 3324 return enable_irq_wake(ndev->irq);
3338} 3325}
3339 3326
@@ -3359,9 +3346,6 @@ static int sh_eth_wol_restore(struct net_device *ndev)
3359 if (ret < 0) 3346 if (ret < 0)
3360 return ret; 3347 return ret;
3361 3348
3362 /* Restore clock usage count */
3363 clk_disable(mdp->clk);
3364
3365 return disable_irq_wake(ndev->irq); 3349 return disable_irq_wake(ndev->irq);
3366} 3350}
3367 3351
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 5270d26f0bc6..2d5d4aea3bcb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -48,26 +48,18 @@
48#define MUX_CLK_NUM_PARENTS 2 48#define MUX_CLK_NUM_PARENTS 2
49 49
50struct meson8b_dwmac { 50struct meson8b_dwmac {
51 struct platform_device *pdev; 51 struct device *dev;
52
53 void __iomem *regs; 52 void __iomem *regs;
54
55 phy_interface_t phy_mode; 53 phy_interface_t phy_mode;
54 struct clk *rgmii_tx_clk;
55 u32 tx_delay_ns;
56};
56 57
58struct meson8b_dwmac_clk_configs {
57 struct clk_mux m250_mux; 59 struct clk_mux m250_mux;
58 struct clk *m250_mux_clk;
59 struct clk *m250_mux_parent[MUX_CLK_NUM_PARENTS];
60
61 struct clk_divider m250_div; 60 struct clk_divider m250_div;
62 struct clk *m250_div_clk;
63
64 struct clk_fixed_factor fixed_div2; 61 struct clk_fixed_factor fixed_div2;
65 struct clk *fixed_div2_clk;
66
67 struct clk_gate rgmii_tx_en; 62 struct clk_gate rgmii_tx_en;
68 struct clk *rgmii_tx_en_clk;
69
70 u32 tx_delay_ns;
71}; 63};
72 64
73static void meson8b_dwmac_mask_bits(struct meson8b_dwmac *dwmac, u32 reg, 65static void meson8b_dwmac_mask_bits(struct meson8b_dwmac *dwmac, u32 reg,
@@ -82,106 +74,99 @@ static void meson8b_dwmac_mask_bits(struct meson8b_dwmac *dwmac, u32 reg,
82 writel(data, dwmac->regs + reg); 74 writel(data, dwmac->regs + reg);
83} 75}
84 76
85static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac) 77static struct clk *meson8b_dwmac_register_clk(struct meson8b_dwmac *dwmac,
78 const char *name_suffix,
79 const char **parent_names,
80 int num_parents,
81 const struct clk_ops *ops,
82 struct clk_hw *hw)
86{ 83{
87 struct clk_init_data init; 84 struct clk_init_data init;
88 int i, ret;
89 struct device *dev = &dwmac->pdev->dev;
90 char clk_name[32]; 85 char clk_name[32];
91 const char *clk_div_parents[1]; 86
92 const char *mux_parent_names[MUX_CLK_NUM_PARENTS]; 87 snprintf(clk_name, sizeof(clk_name), "%s#%s", dev_name(dwmac->dev),
88 name_suffix);
89
90 init.name = clk_name;
91 init.ops = ops;
92 init.flags = CLK_SET_RATE_PARENT;
93 init.parent_names = parent_names;
94 init.num_parents = num_parents;
95
96 hw->init = &init;
97
98 return devm_clk_register(dwmac->dev, hw);
99}
100
101static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac)
102{
103 int i, ret;
104 struct clk *clk;
105 struct device *dev = dwmac->dev;
106 const char *parent_name, *mux_parent_names[MUX_CLK_NUM_PARENTS];
107 struct meson8b_dwmac_clk_configs *clk_configs;
108
109 clk_configs = devm_kzalloc(dev, sizeof(*clk_configs), GFP_KERNEL);
110 if (!clk_configs)
111 return -ENOMEM;
93 112
94 /* get the mux parents from DT */ 113 /* get the mux parents from DT */
95 for (i = 0; i < MUX_CLK_NUM_PARENTS; i++) { 114 for (i = 0; i < MUX_CLK_NUM_PARENTS; i++) {
96 char name[16]; 115 char name[16];
97 116
98 snprintf(name, sizeof(name), "clkin%d", i); 117 snprintf(name, sizeof(name), "clkin%d", i);
99 dwmac->m250_mux_parent[i] = devm_clk_get(dev, name); 118 clk = devm_clk_get(dev, name);
100 if (IS_ERR(dwmac->m250_mux_parent[i])) { 119 if (IS_ERR(clk)) {
101 ret = PTR_ERR(dwmac->m250_mux_parent[i]); 120 ret = PTR_ERR(clk);
102 if (ret != -EPROBE_DEFER) 121 if (ret != -EPROBE_DEFER)
103 dev_err(dev, "Missing clock %s\n", name); 122 dev_err(dev, "Missing clock %s\n", name);
104 return ret; 123 return ret;
105 } 124 }
106 125
107 mux_parent_names[i] = 126 mux_parent_names[i] = __clk_get_name(clk);
108 __clk_get_name(dwmac->m250_mux_parent[i]);
109 } 127 }
110 128
111 /* create the m250_mux */ 129 clk_configs->m250_mux.reg = dwmac->regs + PRG_ETH0;
112 snprintf(clk_name, sizeof(clk_name), "%s#m250_sel", dev_name(dev)); 130 clk_configs->m250_mux.shift = PRG_ETH0_CLK_M250_SEL_SHIFT;
113 init.name = clk_name; 131 clk_configs->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK;
114 init.ops = &clk_mux_ops; 132 clk = meson8b_dwmac_register_clk(dwmac, "m250_sel", mux_parent_names,
115 init.flags = CLK_SET_RATE_PARENT; 133 MUX_CLK_NUM_PARENTS, &clk_mux_ops,
116 init.parent_names = mux_parent_names; 134 &clk_configs->m250_mux.hw);
117 init.num_parents = MUX_CLK_NUM_PARENTS; 135 if (WARN_ON(IS_ERR(clk)))
118 136 return PTR_ERR(clk);
119 dwmac->m250_mux.reg = dwmac->regs + PRG_ETH0; 137
120 dwmac->m250_mux.shift = PRG_ETH0_CLK_M250_SEL_SHIFT; 138 parent_name = __clk_get_name(clk);
121 dwmac->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK; 139 clk_configs->m250_div.reg = dwmac->regs + PRG_ETH0;
122 dwmac->m250_mux.flags = 0; 140 clk_configs->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT;
123 dwmac->m250_mux.table = NULL; 141 clk_configs->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH;
124 dwmac->m250_mux.hw.init = &init; 142 clk_configs->m250_div.flags = CLK_DIVIDER_ONE_BASED |
125
126 dwmac->m250_mux_clk = devm_clk_register(dev, &dwmac->m250_mux.hw);
127 if (WARN_ON(IS_ERR(dwmac->m250_mux_clk)))
128 return PTR_ERR(dwmac->m250_mux_clk);
129
130 /* create the m250_div */
131 snprintf(clk_name, sizeof(clk_name), "%s#m250_div", dev_name(dev));
132 init.name = devm_kstrdup(dev, clk_name, GFP_KERNEL);
133 init.ops = &clk_divider_ops;
134 init.flags = CLK_SET_RATE_PARENT;
135 clk_div_parents[0] = __clk_get_name(dwmac->m250_mux_clk);
136 init.parent_names = clk_div_parents;
137 init.num_parents = ARRAY_SIZE(clk_div_parents);
138
139 dwmac->m250_div.reg = dwmac->regs + PRG_ETH0;
140 dwmac->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT;
141 dwmac->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH;
142 dwmac->m250_div.hw.init = &init;
143 dwmac->m250_div.flags = CLK_DIVIDER_ONE_BASED |
144 CLK_DIVIDER_ALLOW_ZERO | 143 CLK_DIVIDER_ALLOW_ZERO |
145 CLK_DIVIDER_ROUND_CLOSEST; 144 CLK_DIVIDER_ROUND_CLOSEST;
146 145 clk = meson8b_dwmac_register_clk(dwmac, "m250_div", &parent_name, 1,
147 dwmac->m250_div_clk = devm_clk_register(dev, &dwmac->m250_div.hw); 146 &clk_divider_ops,
148 if (WARN_ON(IS_ERR(dwmac->m250_div_clk))) 147 &clk_configs->m250_div.hw);
149 return PTR_ERR(dwmac->m250_div_clk); 148 if (WARN_ON(IS_ERR(clk)))
150 149 return PTR_ERR(clk);
151 /* create the fixed_div2 */ 150
152 snprintf(clk_name, sizeof(clk_name), "%s#fixed_div2", dev_name(dev)); 151 parent_name = __clk_get_name(clk);
153 init.name = devm_kstrdup(dev, clk_name, GFP_KERNEL); 152 clk_configs->fixed_div2.mult = 1;
154 init.ops = &clk_fixed_factor_ops; 153 clk_configs->fixed_div2.div = 2;
155 init.flags = CLK_SET_RATE_PARENT; 154 clk = meson8b_dwmac_register_clk(dwmac, "fixed_div2", &parent_name, 1,
156 clk_div_parents[0] = __clk_get_name(dwmac->m250_div_clk); 155 &clk_fixed_factor_ops,
157 init.parent_names = clk_div_parents; 156 &clk_configs->fixed_div2.hw);
158 init.num_parents = ARRAY_SIZE(clk_div_parents); 157 if (WARN_ON(IS_ERR(clk)))
159 158 return PTR_ERR(clk);
160 dwmac->fixed_div2.mult = 1; 159
161 dwmac->fixed_div2.div = 2; 160 parent_name = __clk_get_name(clk);
162 dwmac->fixed_div2.hw.init = &init; 161 clk_configs->rgmii_tx_en.reg = dwmac->regs + PRG_ETH0;
163 162 clk_configs->rgmii_tx_en.bit_idx = PRG_ETH0_RGMII_TX_CLK_EN;
164 dwmac->fixed_div2_clk = devm_clk_register(dev, &dwmac->fixed_div2.hw); 163 clk = meson8b_dwmac_register_clk(dwmac, "rgmii_tx_en", &parent_name, 1,
165 if (WARN_ON(IS_ERR(dwmac->fixed_div2_clk))) 164 &clk_gate_ops,
166 return PTR_ERR(dwmac->fixed_div2_clk); 165 &clk_configs->rgmii_tx_en.hw);
167 166 if (WARN_ON(IS_ERR(clk)))
168 /* create the rgmii_tx_en */ 167 return PTR_ERR(clk);
169 init.name = devm_kasprintf(dev, GFP_KERNEL, "%s#rgmii_tx_en", 168
170 dev_name(dev)); 169 dwmac->rgmii_tx_clk = clk;
171 init.ops = &clk_gate_ops;
172 init.flags = CLK_SET_RATE_PARENT;
173 clk_div_parents[0] = __clk_get_name(dwmac->fixed_div2_clk);
174 init.parent_names = clk_div_parents;
175 init.num_parents = ARRAY_SIZE(clk_div_parents);
176
177 dwmac->rgmii_tx_en.reg = dwmac->regs + PRG_ETH0;
178 dwmac->rgmii_tx_en.bit_idx = PRG_ETH0_RGMII_TX_CLK_EN;
179 dwmac->rgmii_tx_en.hw.init = &init;
180
181 dwmac->rgmii_tx_en_clk = devm_clk_register(dev,
182 &dwmac->rgmii_tx_en.hw);
183 if (WARN_ON(IS_ERR(dwmac->rgmii_tx_en_clk)))
184 return PTR_ERR(dwmac->rgmii_tx_en_clk);
185 170
186 return 0; 171 return 0;
187} 172}
@@ -219,19 +204,23 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
219 * a register) based on the line-speed (125MHz for Gbit speeds, 204 * a register) based on the line-speed (125MHz for Gbit speeds,
220 * 25MHz for 100Mbit/s and 2.5MHz for 10Mbit/s). 205 * 25MHz for 100Mbit/s and 2.5MHz for 10Mbit/s).
221 */ 206 */
222 ret = clk_set_rate(dwmac->rgmii_tx_en_clk, 125 * 1000 * 1000); 207 ret = clk_set_rate(dwmac->rgmii_tx_clk, 125 * 1000 * 1000);
223 if (ret) { 208 if (ret) {
224 dev_err(&dwmac->pdev->dev, 209 dev_err(dwmac->dev,
225 "failed to set RGMII TX clock\n"); 210 "failed to set RGMII TX clock\n");
226 return ret; 211 return ret;
227 } 212 }
228 213
229 ret = clk_prepare_enable(dwmac->rgmii_tx_en_clk); 214 ret = clk_prepare_enable(dwmac->rgmii_tx_clk);
230 if (ret) { 215 if (ret) {
231 dev_err(&dwmac->pdev->dev, 216 dev_err(dwmac->dev,
232 "failed to enable the RGMII TX clock\n"); 217 "failed to enable the RGMII TX clock\n");
233 return ret; 218 return ret;
234 } 219 }
220
221 devm_add_action_or_reset(dwmac->dev,
222 (void(*)(void *))clk_disable_unprepare,
223 dwmac->rgmii_tx_clk);
235 break; 224 break;
236 225
237 case PHY_INTERFACE_MODE_RMII: 226 case PHY_INTERFACE_MODE_RMII:
@@ -251,7 +240,7 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
251 break; 240 break;
252 241
253 default: 242 default:
254 dev_err(&dwmac->pdev->dev, "unsupported phy-mode %s\n", 243 dev_err(dwmac->dev, "unsupported phy-mode %s\n",
255 phy_modes(dwmac->phy_mode)); 244 phy_modes(dwmac->phy_mode));
256 return -EINVAL; 245 return -EINVAL;
257 } 246 }
@@ -292,7 +281,7 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
292 goto err_remove_config_dt; 281 goto err_remove_config_dt;
293 } 282 }
294 283
295 dwmac->pdev = pdev; 284 dwmac->dev = &pdev->dev;
296 dwmac->phy_mode = of_get_phy_mode(pdev->dev.of_node); 285 dwmac->phy_mode = of_get_phy_mode(pdev->dev.of_node);
297 if (dwmac->phy_mode < 0) { 286 if (dwmac->phy_mode < 0) {
298 dev_err(&pdev->dev, "missing phy-mode property\n"); 287 dev_err(&pdev->dev, "missing phy-mode property\n");
@@ -317,29 +306,16 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
317 306
318 ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); 307 ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
319 if (ret) 308 if (ret)
320 goto err_clk_disable; 309 goto err_remove_config_dt;
321 310
322 return 0; 311 return 0;
323 312
324err_clk_disable:
325 if (phy_interface_mode_is_rgmii(dwmac->phy_mode))
326 clk_disable_unprepare(dwmac->rgmii_tx_en_clk);
327err_remove_config_dt: 313err_remove_config_dt:
328 stmmac_remove_config_dt(pdev, plat_dat); 314 stmmac_remove_config_dt(pdev, plat_dat);
329 315
330 return ret; 316 return ret;
331} 317}
332 318
333static int meson8b_dwmac_remove(struct platform_device *pdev)
334{
335 struct meson8b_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev);
336
337 if (phy_interface_mode_is_rgmii(dwmac->phy_mode))
338 clk_disable_unprepare(dwmac->rgmii_tx_en_clk);
339
340 return stmmac_pltfr_remove(pdev);
341}
342
343static const struct of_device_id meson8b_dwmac_match[] = { 319static const struct of_device_id meson8b_dwmac_match[] = {
344 { .compatible = "amlogic,meson8b-dwmac" }, 320 { .compatible = "amlogic,meson8b-dwmac" },
345 { .compatible = "amlogic,meson-gxbb-dwmac" }, 321 { .compatible = "amlogic,meson-gxbb-dwmac" },
@@ -349,7 +325,7 @@ MODULE_DEVICE_TABLE(of, meson8b_dwmac_match);
349 325
350static struct platform_driver meson8b_dwmac_driver = { 326static struct platform_driver meson8b_dwmac_driver = {
351 .probe = meson8b_dwmac_probe, 327 .probe = meson8b_dwmac_probe,
352 .remove = meson8b_dwmac_remove, 328 .remove = stmmac_pltfr_remove,
353 .driver = { 329 .driver = {
354 .name = "meson8b-dwmac", 330 .name = "meson8b-dwmac",
355 .pm = &stmmac_pltfr_pm_ops, 331 .pm = &stmmac_pltfr_pm_ops,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 63795ecafc8d..46b9ae20ff6c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -120,7 +120,7 @@ static void dwmac4_tx_queue_priority(struct mac_device_info *hw,
120 writel(value, ioaddr + base_register); 120 writel(value, ioaddr + base_register);
121} 121}
122 122
123static void dwmac4_tx_queue_routing(struct mac_device_info *hw, 123static void dwmac4_rx_queue_routing(struct mac_device_info *hw,
124 u8 packet, u32 queue) 124 u8 packet, u32 queue)
125{ 125{
126 void __iomem *ioaddr = hw->pcsr; 126 void __iomem *ioaddr = hw->pcsr;
@@ -713,7 +713,7 @@ static const struct stmmac_ops dwmac4_ops = {
713 .rx_queue_enable = dwmac4_rx_queue_enable, 713 .rx_queue_enable = dwmac4_rx_queue_enable,
714 .rx_queue_prio = dwmac4_rx_queue_priority, 714 .rx_queue_prio = dwmac4_rx_queue_priority,
715 .tx_queue_prio = dwmac4_tx_queue_priority, 715 .tx_queue_prio = dwmac4_tx_queue_priority,
716 .rx_queue_routing = dwmac4_tx_queue_routing, 716 .rx_queue_routing = dwmac4_rx_queue_routing,
717 .prog_mtl_rx_algorithms = dwmac4_prog_mtl_rx_algorithms, 717 .prog_mtl_rx_algorithms = dwmac4_prog_mtl_rx_algorithms,
718 .prog_mtl_tx_algorithms = dwmac4_prog_mtl_tx_algorithms, 718 .prog_mtl_tx_algorithms = dwmac4_prog_mtl_tx_algorithms,
719 .set_mtl_tx_queue_weight = dwmac4_set_mtl_tx_queue_weight, 719 .set_mtl_tx_queue_weight = dwmac4_set_mtl_tx_queue_weight,
@@ -744,7 +744,7 @@ static const struct stmmac_ops dwmac410_ops = {
744 .rx_queue_enable = dwmac4_rx_queue_enable, 744 .rx_queue_enable = dwmac4_rx_queue_enable,
745 .rx_queue_prio = dwmac4_rx_queue_priority, 745 .rx_queue_prio = dwmac4_rx_queue_priority,
746 .tx_queue_prio = dwmac4_tx_queue_priority, 746 .tx_queue_prio = dwmac4_tx_queue_priority,
747 .rx_queue_routing = dwmac4_tx_queue_routing, 747 .rx_queue_routing = dwmac4_rx_queue_routing,
748 .prog_mtl_rx_algorithms = dwmac4_prog_mtl_rx_algorithms, 748 .prog_mtl_rx_algorithms = dwmac4_prog_mtl_rx_algorithms,
749 .prog_mtl_tx_algorithms = dwmac4_prog_mtl_tx_algorithms, 749 .prog_mtl_tx_algorithms = dwmac4_prog_mtl_tx_algorithms,
750 .set_mtl_tx_queue_weight = dwmac4_set_mtl_tx_queue_weight, 750 .set_mtl_tx_queue_weight = dwmac4_set_mtl_tx_queue_weight,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index a916e13624eb..75161e1b7e55 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -58,6 +58,7 @@ struct stmmac_tx_queue {
58 unsigned int dirty_tx; 58 unsigned int dirty_tx;
59 dma_addr_t dma_tx_phy; 59 dma_addr_t dma_tx_phy;
60 u32 tx_tail_addr; 60 u32 tx_tail_addr;
61 u32 mss;
61}; 62};
62 63
63struct stmmac_rx_queue { 64struct stmmac_rx_queue {
@@ -138,7 +139,6 @@ struct stmmac_priv {
138 spinlock_t ptp_lock; 139 spinlock_t ptp_lock;
139 void __iomem *mmcaddr; 140 void __iomem *mmcaddr;
140 void __iomem *ptpaddr; 141 void __iomem *ptpaddr;
141 u32 mss;
142 142
143#ifdef CONFIG_DEBUG_FS 143#ifdef CONFIG_DEBUG_FS
144 struct dentry *dbgfs_dir; 144 struct dentry *dbgfs_dir;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 7ad841434ec8..c8d86d77e03d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1355,6 +1355,7 @@ static int init_dma_tx_desc_rings(struct net_device *dev)
1355 1355
1356 tx_q->dirty_tx = 0; 1356 tx_q->dirty_tx = 0;
1357 tx_q->cur_tx = 0; 1357 tx_q->cur_tx = 0;
1358 tx_q->mss = 0;
1358 1359
1359 netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue)); 1360 netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue));
1360 } 1361 }
@@ -1946,6 +1947,7 @@ static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan)
1946 (i == DMA_TX_SIZE - 1)); 1947 (i == DMA_TX_SIZE - 1));
1947 tx_q->dirty_tx = 0; 1948 tx_q->dirty_tx = 0;
1948 tx_q->cur_tx = 0; 1949 tx_q->cur_tx = 0;
1950 tx_q->mss = 0;
1949 netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, chan)); 1951 netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, chan));
1950 stmmac_start_tx_dma(priv, chan); 1952 stmmac_start_tx_dma(priv, chan);
1951 1953
@@ -2430,7 +2432,7 @@ static void stmmac_mac_config_rx_queues_routing(struct stmmac_priv *priv)
2430 continue; 2432 continue;
2431 2433
2432 packet = priv->plat->rx_queues_cfg[queue].pkt_route; 2434 packet = priv->plat->rx_queues_cfg[queue].pkt_route;
2433 priv->hw->mac->rx_queue_prio(priv->hw, packet, queue); 2435 priv->hw->mac->rx_queue_routing(priv->hw, packet, queue);
2434 } 2436 }
2435} 2437}
2436 2438
@@ -2632,7 +2634,6 @@ static int stmmac_open(struct net_device *dev)
2632 2634
2633 priv->dma_buf_sz = STMMAC_ALIGN(buf_sz); 2635 priv->dma_buf_sz = STMMAC_ALIGN(buf_sz);
2634 priv->rx_copybreak = STMMAC_RX_COPYBREAK; 2636 priv->rx_copybreak = STMMAC_RX_COPYBREAK;
2635 priv->mss = 0;
2636 2637
2637 ret = alloc_dma_desc_resources(priv); 2638 ret = alloc_dma_desc_resources(priv);
2638 if (ret < 0) { 2639 if (ret < 0) {
@@ -2793,6 +2794,7 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
2793 2794
2794 while (tmp_len > 0) { 2795 while (tmp_len > 0) {
2795 tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE); 2796 tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
2797 WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]);
2796 desc = tx_q->dma_tx + tx_q->cur_tx; 2798 desc = tx_q->dma_tx + tx_q->cur_tx;
2797 2799
2798 desc->des0 = cpu_to_le32(des + (total_len - tmp_len)); 2800 desc->des0 = cpu_to_le32(des + (total_len - tmp_len));
@@ -2872,11 +2874,12 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
2872 mss = skb_shinfo(skb)->gso_size; 2874 mss = skb_shinfo(skb)->gso_size;
2873 2875
2874 /* set new MSS value if needed */ 2876 /* set new MSS value if needed */
2875 if (mss != priv->mss) { 2877 if (mss != tx_q->mss) {
2876 mss_desc = tx_q->dma_tx + tx_q->cur_tx; 2878 mss_desc = tx_q->dma_tx + tx_q->cur_tx;
2877 priv->hw->desc->set_mss(mss_desc, mss); 2879 priv->hw->desc->set_mss(mss_desc, mss);
2878 priv->mss = mss; 2880 tx_q->mss = mss;
2879 tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE); 2881 tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
2882 WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]);
2880 } 2883 }
2881 2884
2882 if (netif_msg_tx_queued(priv)) { 2885 if (netif_msg_tx_queued(priv)) {
@@ -2887,6 +2890,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
2887 } 2890 }
2888 2891
2889 first_entry = tx_q->cur_tx; 2892 first_entry = tx_q->cur_tx;
2893 WARN_ON(tx_q->tx_skbuff[first_entry]);
2890 2894
2891 desc = tx_q->dma_tx + first_entry; 2895 desc = tx_q->dma_tx + first_entry;
2892 first = desc; 2896 first = desc;
@@ -2926,7 +2930,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
2926 2930
2927 tx_q->tx_skbuff_dma[tx_q->cur_tx].buf = des; 2931 tx_q->tx_skbuff_dma[tx_q->cur_tx].buf = des;
2928 tx_q->tx_skbuff_dma[tx_q->cur_tx].len = skb_frag_size(frag); 2932 tx_q->tx_skbuff_dma[tx_q->cur_tx].len = skb_frag_size(frag);
2929 tx_q->tx_skbuff[tx_q->cur_tx] = NULL;
2930 tx_q->tx_skbuff_dma[tx_q->cur_tx].map_as_page = true; 2933 tx_q->tx_skbuff_dma[tx_q->cur_tx].map_as_page = true;
2931 } 2934 }
2932 2935
@@ -3062,6 +3065,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
3062 3065
3063 entry = tx_q->cur_tx; 3066 entry = tx_q->cur_tx;
3064 first_entry = entry; 3067 first_entry = entry;
3068 WARN_ON(tx_q->tx_skbuff[first_entry]);
3065 3069
3066 csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL); 3070 csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL);
3067 3071
@@ -3090,6 +3094,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
3090 bool last_segment = (i == (nfrags - 1)); 3094 bool last_segment = (i == (nfrags - 1));
3091 3095
3092 entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE); 3096 entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
3097 WARN_ON(tx_q->tx_skbuff[entry]);
3093 3098
3094 if (likely(priv->extend_desc)) 3099 if (likely(priv->extend_desc))
3095 desc = (struct dma_desc *)(tx_q->dma_etx + entry); 3100 desc = (struct dma_desc *)(tx_q->dma_etx + entry);
@@ -3101,8 +3106,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
3101 if (dma_mapping_error(priv->device, des)) 3106 if (dma_mapping_error(priv->device, des))
3102 goto dma_map_err; /* should reuse desc w/o issues */ 3107 goto dma_map_err; /* should reuse desc w/o issues */
3103 3108
3104 tx_q->tx_skbuff[entry] = NULL;
3105
3106 tx_q->tx_skbuff_dma[entry].buf = des; 3109 tx_q->tx_skbuff_dma[entry].buf = des;
3107 if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) 3110 if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
3108 desc->des0 = cpu_to_le32(des); 3111 desc->des0 = cpu_to_le32(des);
@@ -4436,6 +4439,7 @@ static void stmmac_reset_queues_param(struct stmmac_priv *priv)
4436 4439
4437 tx_q->cur_tx = 0; 4440 tx_q->cur_tx = 0;
4438 tx_q->dirty_tx = 0; 4441 tx_q->dirty_tx = 0;
4442 tx_q->mss = 0;
4439 } 4443 }
4440} 4444}
4441 4445
@@ -4481,11 +4485,6 @@ int stmmac_resume(struct device *dev)
4481 4485
4482 stmmac_reset_queues_param(priv); 4486 stmmac_reset_queues_param(priv);
4483 4487
4484 /* reset private mss value to force mss context settings at
4485 * next tso xmit (only used for gmac4).
4486 */
4487 priv->mss = 0;
4488
4489 stmmac_clear_descriptors(priv); 4488 stmmac_clear_descriptors(priv);
4490 4489
4491 stmmac_hw_setup(ndev, false); 4490 stmmac_hw_setup(ndev, false);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 05f122b8424a..ebd3e5ffa73c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -135,13 +135,14 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev)
135 * stmmac_mtl_setup - parse DT parameters for multiple queues configuration 135 * stmmac_mtl_setup - parse DT parameters for multiple queues configuration
136 * @pdev: platform device 136 * @pdev: platform device
137 */ 137 */
138static void stmmac_mtl_setup(struct platform_device *pdev, 138static int stmmac_mtl_setup(struct platform_device *pdev,
139 struct plat_stmmacenet_data *plat) 139 struct plat_stmmacenet_data *plat)
140{ 140{
141 struct device_node *q_node; 141 struct device_node *q_node;
142 struct device_node *rx_node; 142 struct device_node *rx_node;
143 struct device_node *tx_node; 143 struct device_node *tx_node;
144 u8 queue = 0; 144 u8 queue = 0;
145 int ret = 0;
145 146
146 /* For backwards-compatibility with device trees that don't have any 147 /* For backwards-compatibility with device trees that don't have any
147 * snps,mtl-rx-config or snps,mtl-tx-config properties, we fall back 148 * snps,mtl-rx-config or snps,mtl-tx-config properties, we fall back
@@ -159,12 +160,12 @@ static void stmmac_mtl_setup(struct platform_device *pdev,
159 160
160 rx_node = of_parse_phandle(pdev->dev.of_node, "snps,mtl-rx-config", 0); 161 rx_node = of_parse_phandle(pdev->dev.of_node, "snps,mtl-rx-config", 0);
161 if (!rx_node) 162 if (!rx_node)
162 return; 163 return ret;
163 164
164 tx_node = of_parse_phandle(pdev->dev.of_node, "snps,mtl-tx-config", 0); 165 tx_node = of_parse_phandle(pdev->dev.of_node, "snps,mtl-tx-config", 0);
165 if (!tx_node) { 166 if (!tx_node) {
166 of_node_put(rx_node); 167 of_node_put(rx_node);
167 return; 168 return ret;
168 } 169 }
169 170
170 /* Processing RX queues common config */ 171 /* Processing RX queues common config */
@@ -220,6 +221,11 @@ static void stmmac_mtl_setup(struct platform_device *pdev,
220 221
221 queue++; 222 queue++;
222 } 223 }
224 if (queue != plat->rx_queues_to_use) {
225 ret = -EINVAL;
226 dev_err(&pdev->dev, "Not all RX queues were configured\n");
227 goto out;
228 }
223 229
224 /* Processing TX queues common config */ 230 /* Processing TX queues common config */
225 if (of_property_read_u32(tx_node, "snps,tx-queues-to-use", 231 if (of_property_read_u32(tx_node, "snps,tx-queues-to-use",
@@ -281,10 +287,18 @@ static void stmmac_mtl_setup(struct platform_device *pdev,
281 287
282 queue++; 288 queue++;
283 } 289 }
290 if (queue != plat->tx_queues_to_use) {
291 ret = -EINVAL;
292 dev_err(&pdev->dev, "Not all TX queues were configured\n");
293 goto out;
294 }
284 295
296out:
285 of_node_put(rx_node); 297 of_node_put(rx_node);
286 of_node_put(tx_node); 298 of_node_put(tx_node);
287 of_node_put(q_node); 299 of_node_put(q_node);
300
301 return ret;
288} 302}
289 303
290/** 304/**
@@ -376,6 +390,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
376 struct device_node *np = pdev->dev.of_node; 390 struct device_node *np = pdev->dev.of_node;
377 struct plat_stmmacenet_data *plat; 391 struct plat_stmmacenet_data *plat;
378 struct stmmac_dma_cfg *dma_cfg; 392 struct stmmac_dma_cfg *dma_cfg;
393 int rc;
379 394
380 plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL); 395 plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL);
381 if (!plat) 396 if (!plat)
@@ -402,8 +417,9 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
402 dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n"); 417 dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n");
403 418
404 /* To Configure PHY by using all device-tree supported properties */ 419 /* To Configure PHY by using all device-tree supported properties */
405 if (stmmac_dt_phy(plat, np, &pdev->dev)) 420 rc = stmmac_dt_phy(plat, np, &pdev->dev);
406 return ERR_PTR(-ENODEV); 421 if (rc)
422 return ERR_PTR(rc);
407 423
408 of_property_read_u32(np, "tx-fifo-depth", &plat->tx_fifo_size); 424 of_property_read_u32(np, "tx-fifo-depth", &plat->tx_fifo_size);
409 425
@@ -499,7 +515,11 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
499 515
500 plat->axi = stmmac_axi_setup(pdev); 516 plat->axi = stmmac_axi_setup(pdev);
501 517
502 stmmac_mtl_setup(pdev, plat); 518 rc = stmmac_mtl_setup(pdev, plat);
519 if (rc) {
520 stmmac_remove_config_dt(pdev, plat);
521 return ERR_PTR(rc);
522 }
503 523
504 /* clock setup */ 524 /* clock setup */
505 plat->stmmac_clk = devm_clk_get(&pdev->dev, 525 plat->stmmac_clk = devm_clk_get(&pdev->dev,
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index c1f008fe4e1d..1b5dc200b573 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -35,6 +35,7 @@ void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
35} 35}
36EXPORT_SYMBOL_GPL(ipvlan_count_rx); 36EXPORT_SYMBOL_GPL(ipvlan_count_rx);
37 37
38#if IS_ENABLED(CONFIG_IPV6)
38static u8 ipvlan_get_v6_hash(const void *iaddr) 39static u8 ipvlan_get_v6_hash(const void *iaddr)
39{ 40{
40 const struct in6_addr *ip6_addr = iaddr; 41 const struct in6_addr *ip6_addr = iaddr;
@@ -42,6 +43,12 @@ static u8 ipvlan_get_v6_hash(const void *iaddr)
42 return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) & 43 return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) &
43 IPVLAN_HASH_MASK; 44 IPVLAN_HASH_MASK;
44} 45}
46#else
47static u8 ipvlan_get_v6_hash(const void *iaddr)
48{
49 return 0;
50}
51#endif
45 52
46static u8 ipvlan_get_v4_hash(const void *iaddr) 53static u8 ipvlan_get_v4_hash(const void *iaddr)
47{ 54{
@@ -51,6 +58,23 @@ static u8 ipvlan_get_v4_hash(const void *iaddr)
51 IPVLAN_HASH_MASK; 58 IPVLAN_HASH_MASK;
52} 59}
53 60
61static bool addr_equal(bool is_v6, struct ipvl_addr *addr, const void *iaddr)
62{
63 if (!is_v6 && addr->atype == IPVL_IPV4) {
64 struct in_addr *i4addr = (struct in_addr *)iaddr;
65
66 return addr->ip4addr.s_addr == i4addr->s_addr;
67#if IS_ENABLED(CONFIG_IPV6)
68 } else if (is_v6 && addr->atype == IPVL_IPV6) {
69 struct in6_addr *i6addr = (struct in6_addr *)iaddr;
70
71 return ipv6_addr_equal(&addr->ip6addr, i6addr);
72#endif
73 }
74
75 return false;
76}
77
54static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, 78static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
55 const void *iaddr, bool is_v6) 79 const void *iaddr, bool is_v6)
56{ 80{
@@ -59,15 +83,9 @@ static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
59 83
60 hash = is_v6 ? ipvlan_get_v6_hash(iaddr) : 84 hash = is_v6 ? ipvlan_get_v6_hash(iaddr) :
61 ipvlan_get_v4_hash(iaddr); 85 ipvlan_get_v4_hash(iaddr);
62 hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode) { 86 hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode)
63 if (is_v6 && addr->atype == IPVL_IPV6 && 87 if (addr_equal(is_v6, addr, iaddr))
64 ipv6_addr_equal(&addr->ip6addr, iaddr))
65 return addr;
66 else if (!is_v6 && addr->atype == IPVL_IPV4 &&
67 addr->ip4addr.s_addr ==
68 ((struct in_addr *)iaddr)->s_addr)
69 return addr; 88 return addr;
70 }
71 return NULL; 89 return NULL;
72} 90}
73 91
@@ -93,13 +111,9 @@ struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
93{ 111{
94 struct ipvl_addr *addr; 112 struct ipvl_addr *addr;
95 113
96 list_for_each_entry(addr, &ipvlan->addrs, anode) { 114 list_for_each_entry(addr, &ipvlan->addrs, anode)
97 if ((is_v6 && addr->atype == IPVL_IPV6 && 115 if (addr_equal(is_v6, addr, iaddr))
98 ipv6_addr_equal(&addr->ip6addr, iaddr)) ||
99 (!is_v6 && addr->atype == IPVL_IPV4 &&
100 addr->ip4addr.s_addr == ((struct in_addr *)iaddr)->s_addr))
101 return addr; 116 return addr;
102 }
103 return NULL; 117 return NULL;
104} 118}
105 119
@@ -150,6 +164,7 @@ static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int
150 lyr3h = ip4h; 164 lyr3h = ip4h;
151 break; 165 break;
152 } 166 }
167#if IS_ENABLED(CONFIG_IPV6)
153 case htons(ETH_P_IPV6): { 168 case htons(ETH_P_IPV6): {
154 struct ipv6hdr *ip6h; 169 struct ipv6hdr *ip6h;
155 170
@@ -188,6 +203,7 @@ static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int
188 } 203 }
189 break; 204 break;
190 } 205 }
206#endif
191 default: 207 default:
192 return NULL; 208 return NULL;
193 } 209 }
@@ -337,14 +353,18 @@ static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
337{ 353{
338 struct ipvl_addr *addr = NULL; 354 struct ipvl_addr *addr = NULL;
339 355
340 if (addr_type == IPVL_IPV6) { 356 switch (addr_type) {
357#if IS_ENABLED(CONFIG_IPV6)
358 case IPVL_IPV6: {
341 struct ipv6hdr *ip6h; 359 struct ipv6hdr *ip6h;
342 struct in6_addr *i6addr; 360 struct in6_addr *i6addr;
343 361
344 ip6h = (struct ipv6hdr *)lyr3h; 362 ip6h = (struct ipv6hdr *)lyr3h;
345 i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr; 363 i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr;
346 addr = ipvlan_ht_addr_lookup(port, i6addr, true); 364 addr = ipvlan_ht_addr_lookup(port, i6addr, true);
347 } else if (addr_type == IPVL_ICMPV6) { 365 break;
366 }
367 case IPVL_ICMPV6: {
348 struct nd_msg *ndmh; 368 struct nd_msg *ndmh;
349 struct in6_addr *i6addr; 369 struct in6_addr *i6addr;
350 370
@@ -356,14 +376,19 @@ static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
356 i6addr = &ndmh->target; 376 i6addr = &ndmh->target;
357 addr = ipvlan_ht_addr_lookup(port, i6addr, true); 377 addr = ipvlan_ht_addr_lookup(port, i6addr, true);
358 } 378 }
359 } else if (addr_type == IPVL_IPV4) { 379 break;
380 }
381#endif
382 case IPVL_IPV4: {
360 struct iphdr *ip4h; 383 struct iphdr *ip4h;
361 __be32 *i4addr; 384 __be32 *i4addr;
362 385
363 ip4h = (struct iphdr *)lyr3h; 386 ip4h = (struct iphdr *)lyr3h;
364 i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr; 387 i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr;
365 addr = ipvlan_ht_addr_lookup(port, i4addr, false); 388 addr = ipvlan_ht_addr_lookup(port, i4addr, false);
366 } else if (addr_type == IPVL_ARP) { 389 break;
390 }
391 case IPVL_ARP: {
367 struct arphdr *arph; 392 struct arphdr *arph;
368 unsigned char *arp_ptr; 393 unsigned char *arp_ptr;
369 __be32 dip; 394 __be32 dip;
@@ -377,6 +402,8 @@ static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
377 402
378 memcpy(&dip, arp_ptr, 4); 403 memcpy(&dip, arp_ptr, 4);
379 addr = ipvlan_ht_addr_lookup(port, &dip, false); 404 addr = ipvlan_ht_addr_lookup(port, &dip, false);
405 break;
406 }
380 } 407 }
381 408
382 return addr; 409 return addr;
@@ -420,6 +447,7 @@ out:
420 return ret; 447 return ret;
421} 448}
422 449
450#if IS_ENABLED(CONFIG_IPV6)
423static int ipvlan_process_v6_outbound(struct sk_buff *skb) 451static int ipvlan_process_v6_outbound(struct sk_buff *skb)
424{ 452{
425 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 453 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -456,6 +484,12 @@ err:
456out: 484out:
457 return ret; 485 return ret;
458} 486}
487#else
488static int ipvlan_process_v6_outbound(struct sk_buff *skb)
489{
490 return NET_XMIT_DROP;
491}
492#endif
459 493
460static int ipvlan_process_outbound(struct sk_buff *skb) 494static int ipvlan_process_outbound(struct sk_buff *skb)
461{ 495{
@@ -759,6 +793,7 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
759 goto out; 793 goto out;
760 break; 794 break;
761 } 795 }
796#if IS_ENABLED(CONFIG_IPV6)
762 case AF_INET6: 797 case AF_INET6:
763 { 798 {
764 struct dst_entry *dst; 799 struct dst_entry *dst;
@@ -778,6 +813,7 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
778 skb_dst_set(skb, dst); 813 skb_dst_set(skb, dst);
779 break; 814 break;
780 } 815 }
816#endif
781 default: 817 default:
782 break; 818 break;
783 } 819 }
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 2469df118fbf..67c91ceda979 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -22,12 +22,14 @@ static const struct nf_hook_ops ipvl_nfops[] = {
22 .hooknum = NF_INET_LOCAL_IN, 22 .hooknum = NF_INET_LOCAL_IN,
23 .priority = INT_MAX, 23 .priority = INT_MAX,
24 }, 24 },
25#if IS_ENABLED(CONFIG_IPV6)
25 { 26 {
26 .hook = ipvlan_nf_input, 27 .hook = ipvlan_nf_input,
27 .pf = NFPROTO_IPV6, 28 .pf = NFPROTO_IPV6,
28 .hooknum = NF_INET_LOCAL_IN, 29 .hooknum = NF_INET_LOCAL_IN,
29 .priority = INT_MAX, 30 .priority = INT_MAX,
30 }, 31 },
32#endif
31}; 33};
32 34
33static const struct l3mdev_ops ipvl_l3mdev_ops = { 35static const struct l3mdev_ops ipvl_l3mdev_ops = {
@@ -800,12 +802,14 @@ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
800 return -ENOMEM; 802 return -ENOMEM;
801 803
802 addr->master = ipvlan; 804 addr->master = ipvlan;
803 if (is_v6) { 805 if (!is_v6) {
804 memcpy(&addr->ip6addr, iaddr, sizeof(struct in6_addr));
805 addr->atype = IPVL_IPV6;
806 } else {
807 memcpy(&addr->ip4addr, iaddr, sizeof(struct in_addr)); 806 memcpy(&addr->ip4addr, iaddr, sizeof(struct in_addr));
808 addr->atype = IPVL_IPV4; 807 addr->atype = IPVL_IPV4;
808#if IS_ENABLED(CONFIG_IPV6)
809 } else {
810 memcpy(&addr->ip6addr, iaddr, sizeof(struct in6_addr));
811 addr->atype = IPVL_IPV6;
812#endif
809 } 813 }
810 list_add_tail(&addr->anode, &ipvlan->addrs); 814 list_add_tail(&addr->anode, &ipvlan->addrs);
811 815
@@ -833,6 +837,20 @@ static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
833 return; 837 return;
834} 838}
835 839
840static bool ipvlan_is_valid_dev(const struct net_device *dev)
841{
842 struct ipvl_dev *ipvlan = netdev_priv(dev);
843
844 if (!netif_is_ipvlan(dev))
845 return false;
846
847 if (!ipvlan || !ipvlan->port)
848 return false;
849
850 return true;
851}
852
853#if IS_ENABLED(CONFIG_IPV6)
836static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) 854static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
837{ 855{
838 if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) { 856 if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
@@ -850,19 +868,6 @@ static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
850 return ipvlan_del_addr(ipvlan, ip6_addr, true); 868 return ipvlan_del_addr(ipvlan, ip6_addr, true);
851} 869}
852 870
853static bool ipvlan_is_valid_dev(const struct net_device *dev)
854{
855 struct ipvl_dev *ipvlan = netdev_priv(dev);
856
857 if (!netif_is_ipvlan(dev))
858 return false;
859
860 if (!ipvlan || !ipvlan->port)
861 return false;
862
863 return true;
864}
865
866static int ipvlan_addr6_event(struct notifier_block *unused, 871static int ipvlan_addr6_event(struct notifier_block *unused,
867 unsigned long event, void *ptr) 872 unsigned long event, void *ptr)
868{ 873{
@@ -913,6 +918,7 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
913 918
914 return NOTIFY_OK; 919 return NOTIFY_OK;
915} 920}
921#endif
916 922
917static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) 923static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
918{ 924{
@@ -993,6 +999,7 @@ static struct notifier_block ipvlan_notifier_block __read_mostly = {
993 .notifier_call = ipvlan_device_event, 999 .notifier_call = ipvlan_device_event,
994}; 1000};
995 1001
1002#if IS_ENABLED(CONFIG_IPV6)
996static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = { 1003static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = {
997 .notifier_call = ipvlan_addr6_event, 1004 .notifier_call = ipvlan_addr6_event,
998}; 1005};
@@ -1000,6 +1007,7 @@ static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = {
1000static struct notifier_block ipvlan_addr6_vtor_notifier_block __read_mostly = { 1007static struct notifier_block ipvlan_addr6_vtor_notifier_block __read_mostly = {
1001 .notifier_call = ipvlan_addr6_validator_event, 1008 .notifier_call = ipvlan_addr6_validator_event,
1002}; 1009};
1010#endif
1003 1011
1004static void ipvlan_ns_exit(struct net *net) 1012static void ipvlan_ns_exit(struct net *net)
1005{ 1013{
@@ -1024,9 +1032,11 @@ static int __init ipvlan_init_module(void)
1024 1032
1025 ipvlan_init_secret(); 1033 ipvlan_init_secret();
1026 register_netdevice_notifier(&ipvlan_notifier_block); 1034 register_netdevice_notifier(&ipvlan_notifier_block);
1035#if IS_ENABLED(CONFIG_IPV6)
1027 register_inet6addr_notifier(&ipvlan_addr6_notifier_block); 1036 register_inet6addr_notifier(&ipvlan_addr6_notifier_block);
1028 register_inet6addr_validator_notifier( 1037 register_inet6addr_validator_notifier(
1029 &ipvlan_addr6_vtor_notifier_block); 1038 &ipvlan_addr6_vtor_notifier_block);
1039#endif
1030 register_inetaddr_notifier(&ipvlan_addr4_notifier_block); 1040 register_inetaddr_notifier(&ipvlan_addr4_notifier_block);
1031 register_inetaddr_validator_notifier(&ipvlan_addr4_vtor_notifier_block); 1041 register_inetaddr_validator_notifier(&ipvlan_addr4_vtor_notifier_block);
1032 1042
@@ -1045,9 +1055,11 @@ error:
1045 unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block); 1055 unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
1046 unregister_inetaddr_validator_notifier( 1056 unregister_inetaddr_validator_notifier(
1047 &ipvlan_addr4_vtor_notifier_block); 1057 &ipvlan_addr4_vtor_notifier_block);
1058#if IS_ENABLED(CONFIG_IPV6)
1048 unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block); 1059 unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
1049 unregister_inet6addr_validator_notifier( 1060 unregister_inet6addr_validator_notifier(
1050 &ipvlan_addr6_vtor_notifier_block); 1061 &ipvlan_addr6_vtor_notifier_block);
1062#endif
1051 unregister_netdevice_notifier(&ipvlan_notifier_block); 1063 unregister_netdevice_notifier(&ipvlan_notifier_block);
1052 return err; 1064 return err;
1053} 1065}
@@ -1060,9 +1072,11 @@ static void __exit ipvlan_cleanup_module(void)
1060 unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block); 1072 unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
1061 unregister_inetaddr_validator_notifier( 1073 unregister_inetaddr_validator_notifier(
1062 &ipvlan_addr4_vtor_notifier_block); 1074 &ipvlan_addr4_vtor_notifier_block);
1075#if IS_ENABLED(CONFIG_IPV6)
1063 unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block); 1076 unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
1064 unregister_inet6addr_validator_notifier( 1077 unregister_inet6addr_validator_notifier(
1065 &ipvlan_addr6_vtor_notifier_block); 1078 &ipvlan_addr6_vtor_notifier_block);
1079#endif
1066} 1080}
1067 1081
1068module_init(ipvlan_init_module); 1082module_init(ipvlan_init_module);
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 30612497643c..b97a907ea5aa 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -230,4 +230,5 @@ out:
230/* Registered in net/core/dev.c */ 230/* Registered in net/core/dev.c */
231struct pernet_operations __net_initdata loopback_net_ops = { 231struct pernet_operations __net_initdata loopback_net_ops = {
232 .init = loopback_net_init, 232 .init = loopback_net_init,
233 .async = true,
233}; 234};
diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index ab58224f897f..b3935778b19f 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -75,6 +75,8 @@
75 75
76#define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX 0x0 76#define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX 0x0
77#define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN 0x1f 77#define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN 0x1f
78#define DP83867_IO_MUX_CFG_CLK_O_SEL_MASK (0x1f << 8)
79#define DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT 8
78 80
79/* CFG4 bits */ 81/* CFG4 bits */
80#define DP83867_CFG4_PORT_MIRROR_EN BIT(0) 82#define DP83867_CFG4_PORT_MIRROR_EN BIT(0)
@@ -92,6 +94,7 @@ struct dp83867_private {
92 int io_impedance; 94 int io_impedance;
93 int port_mirroring; 95 int port_mirroring;
94 bool rxctrl_strap_quirk; 96 bool rxctrl_strap_quirk;
97 int clk_output_sel;
95}; 98};
96 99
97static int dp83867_ack_interrupt(struct phy_device *phydev) 100static int dp83867_ack_interrupt(struct phy_device *phydev)
@@ -160,6 +163,14 @@ static int dp83867_of_init(struct phy_device *phydev)
160 dp83867->io_impedance = -EINVAL; 163 dp83867->io_impedance = -EINVAL;
161 164
162 /* Optional configuration */ 165 /* Optional configuration */
166 ret = of_property_read_u32(of_node, "ti,clk-output-sel",
167 &dp83867->clk_output_sel);
168 if (ret || dp83867->clk_output_sel > DP83867_CLK_O_SEL_REF_CLK)
169 /* Keep the default value if ti,clk-output-sel is not set
170 * or too high
171 */
172 dp83867->clk_output_sel = DP83867_CLK_O_SEL_REF_CLK;
173
163 if (of_property_read_bool(of_node, "ti,max-output-impedance")) 174 if (of_property_read_bool(of_node, "ti,max-output-impedance"))
164 dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX; 175 dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX;
165 else if (of_property_read_bool(of_node, "ti,min-output-impedance")) 176 else if (of_property_read_bool(of_node, "ti,min-output-impedance"))
@@ -295,6 +306,14 @@ static int dp83867_config_init(struct phy_device *phydev)
295 if (dp83867->port_mirroring != DP83867_PORT_MIRROING_KEEP) 306 if (dp83867->port_mirroring != DP83867_PORT_MIRROING_KEEP)
296 dp83867_config_port_mirroring(phydev); 307 dp83867_config_port_mirroring(phydev);
297 308
309 /* Clock output selection if muxing property is set */
310 if (dp83867->clk_output_sel != DP83867_CLK_O_SEL_REF_CLK) {
311 val = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG);
312 val &= ~DP83867_IO_MUX_CFG_CLK_O_SEL_MASK;
313 val |= (dp83867->clk_output_sel << DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT);
314 phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG, val);
315 }
316
298 return 0; 317 return 0;
299} 318}
300 319
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index b13eed21c87d..d39ae77707ef 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1382,7 +1382,7 @@ int genphy_setup_forced(struct phy_device *phydev)
1382 ctl |= BMCR_FULLDPLX; 1382 ctl |= BMCR_FULLDPLX;
1383 1383
1384 return phy_modify(phydev, MII_BMCR, 1384 return phy_modify(phydev, MII_BMCR,
1385 BMCR_LOOPBACK | BMCR_ISOLATE | BMCR_PDOWN, ctl); 1385 ~(BMCR_LOOPBACK | BMCR_ISOLATE | BMCR_PDOWN), ctl);
1386} 1386}
1387EXPORT_SYMBOL(genphy_setup_forced); 1387EXPORT_SYMBOL(genphy_setup_forced);
1388 1388
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 5aa59f41bf8c..bd89d1c559ce 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -714,7 +714,7 @@ err_put:
714} 714}
715 715
716static int pppoe_getname(struct socket *sock, struct sockaddr *uaddr, 716static int pppoe_getname(struct socket *sock, struct sockaddr *uaddr,
717 int *usockaddr_len, int peer) 717 int peer)
718{ 718{
719 int len = sizeof(struct sockaddr_pppox); 719 int len = sizeof(struct sockaddr_pppox);
720 struct sockaddr_pppox sp; 720 struct sockaddr_pppox sp;
@@ -726,9 +726,7 @@ static int pppoe_getname(struct socket *sock, struct sockaddr *uaddr,
726 726
727 memcpy(uaddr, &sp, len); 727 memcpy(uaddr, &sp, len);
728 728
729 *usockaddr_len = len; 729 return len;
730
731 return 0;
732} 730}
733 731
734static int pppoe_ioctl(struct socket *sock, unsigned int cmd, 732static int pppoe_ioctl(struct socket *sock, unsigned int cmd,
diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index 6dde9a0cfe76..8249d46a7844 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -483,7 +483,7 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr,
483} 483}
484 484
485static int pptp_getname(struct socket *sock, struct sockaddr *uaddr, 485static int pptp_getname(struct socket *sock, struct sockaddr *uaddr,
486 int *usockaddr_len, int peer) 486 int peer)
487{ 487{
488 int len = sizeof(struct sockaddr_pppox); 488 int len = sizeof(struct sockaddr_pppox);
489 struct sockaddr_pppox sp; 489 struct sockaddr_pppox sp;
@@ -496,9 +496,7 @@ static int pptp_getname(struct socket *sock, struct sockaddr *uaddr,
496 496
497 memcpy(uaddr, &sp, len); 497 memcpy(uaddr, &sp, len);
498 498
499 *usockaddr_len = len; 499 return len;
500
501 return 0;
502} 500}
503 501
504static int pptp_release(struct socket *sock) 502static int pptp_release(struct socket *sock)
diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c
index ca5e375de27c..e0d6760f3219 100644
--- a/drivers/net/thunderbolt.c
+++ b/drivers/net/thunderbolt.c
@@ -166,6 +166,8 @@ struct tbnet_ring {
166 * @connected_work: Worker that finalizes the ThunderboltIP connection 166 * @connected_work: Worker that finalizes the ThunderboltIP connection
167 * setup and enables DMA paths for high speed data 167 * setup and enables DMA paths for high speed data
168 * transfers 168 * transfers
169 * @disconnect_work: Worker that handles tearing down the ThunderboltIP
170 * connection
169 * @rx_hdr: Copy of the currently processed Rx frame. Used when a 171 * @rx_hdr: Copy of the currently processed Rx frame. Used when a
170 * network packet consists of multiple Thunderbolt frames. 172 * network packet consists of multiple Thunderbolt frames.
171 * In host byte order. 173 * In host byte order.
@@ -190,6 +192,7 @@ struct tbnet {
190 int login_retries; 192 int login_retries;
191 struct delayed_work login_work; 193 struct delayed_work login_work;
192 struct work_struct connected_work; 194 struct work_struct connected_work;
195 struct work_struct disconnect_work;
193 struct thunderbolt_ip_frame_header rx_hdr; 196 struct thunderbolt_ip_frame_header rx_hdr;
194 struct tbnet_ring rx_ring; 197 struct tbnet_ring rx_ring;
195 atomic_t frame_id; 198 atomic_t frame_id;
@@ -445,7 +448,7 @@ static int tbnet_handle_packet(const void *buf, size_t size, void *data)
445 case TBIP_LOGOUT: 448 case TBIP_LOGOUT:
446 ret = tbnet_logout_response(net, route, sequence, command_id); 449 ret = tbnet_logout_response(net, route, sequence, command_id);
447 if (!ret) 450 if (!ret)
448 tbnet_tear_down(net, false); 451 queue_work(system_long_wq, &net->disconnect_work);
449 break; 452 break;
450 453
451 default: 454 default:
@@ -659,6 +662,13 @@ static void tbnet_login_work(struct work_struct *work)
659 } 662 }
660} 663}
661 664
665static void tbnet_disconnect_work(struct work_struct *work)
666{
667 struct tbnet *net = container_of(work, typeof(*net), disconnect_work);
668
669 tbnet_tear_down(net, false);
670}
671
662static bool tbnet_check_frame(struct tbnet *net, const struct tbnet_frame *tf, 672static bool tbnet_check_frame(struct tbnet *net, const struct tbnet_frame *tf,
663 const struct thunderbolt_ip_frame_header *hdr) 673 const struct thunderbolt_ip_frame_header *hdr)
664{ 674{
@@ -881,6 +891,7 @@ static int tbnet_stop(struct net_device *dev)
881 891
882 napi_disable(&net->napi); 892 napi_disable(&net->napi);
883 893
894 cancel_work_sync(&net->disconnect_work);
884 tbnet_tear_down(net, true); 895 tbnet_tear_down(net, true);
885 896
886 tb_ring_free(net->rx_ring.ring); 897 tb_ring_free(net->rx_ring.ring);
@@ -1195,6 +1206,7 @@ static int tbnet_probe(struct tb_service *svc, const struct tb_service_id *id)
1195 net = netdev_priv(dev); 1206 net = netdev_priv(dev);
1196 INIT_DELAYED_WORK(&net->login_work, tbnet_login_work); 1207 INIT_DELAYED_WORK(&net->login_work, tbnet_login_work);
1197 INIT_WORK(&net->connected_work, tbnet_connected_work); 1208 INIT_WORK(&net->connected_work, tbnet_connected_work);
1209 INIT_WORK(&net->disconnect_work, tbnet_disconnect_work);
1198 mutex_init(&net->connection_lock); 1210 mutex_init(&net->connection_lock);
1199 atomic_set(&net->command_id, 0); 1211 atomic_set(&net->command_id, 0);
1200 atomic_set(&net->frame_id, 0); 1212 atomic_set(&net->frame_id, 0);
@@ -1270,10 +1282,7 @@ static int __maybe_unused tbnet_suspend(struct device *dev)
1270 stop_login(net); 1282 stop_login(net);
1271 if (netif_running(net->dev)) { 1283 if (netif_running(net->dev)) {
1272 netif_device_detach(net->dev); 1284 netif_device_detach(net->dev);
1273 tb_ring_stop(net->rx_ring.ring); 1285 tbnet_tear_down(net, true);
1274 tb_ring_stop(net->tx_ring.ring);
1275 tbnet_free_buffers(&net->rx_ring);
1276 tbnet_free_buffers(&net->tx_ring);
1277 } 1286 }
1278 1287
1279 return 0; 1288 return 0;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 81e6cc951e7f..d531954512c7 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -78,6 +78,7 @@
78#include <linux/mutex.h> 78#include <linux/mutex.h>
79 79
80#include <linux/uaccess.h> 80#include <linux/uaccess.h>
81#include <linux/proc_fs.h>
81 82
82/* Uncomment to enable debugging */ 83/* Uncomment to enable debugging */
83/* #define TUN_DEBUG 1 */ 84/* #define TUN_DEBUG 1 */
@@ -1489,27 +1490,23 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
1489 skb->truesize += skb->data_len; 1490 skb->truesize += skb->data_len;
1490 1491
1491 for (i = 1; i < it->nr_segs; i++) { 1492 for (i = 1; i < it->nr_segs; i++) {
1493 struct page_frag *pfrag = &current->task_frag;
1492 size_t fragsz = it->iov[i].iov_len; 1494 size_t fragsz = it->iov[i].iov_len;
1493 unsigned long offset;
1494 struct page *page;
1495 void *data;
1496 1495
1497 if (fragsz == 0 || fragsz > PAGE_SIZE) { 1496 if (fragsz == 0 || fragsz > PAGE_SIZE) {
1498 err = -EINVAL; 1497 err = -EINVAL;
1499 goto free; 1498 goto free;
1500 } 1499 }
1501 1500
1502 local_bh_disable(); 1501 if (!skb_page_frag_refill(fragsz, pfrag, GFP_KERNEL)) {
1503 data = napi_alloc_frag(fragsz);
1504 local_bh_enable();
1505 if (!data) {
1506 err = -ENOMEM; 1502 err = -ENOMEM;
1507 goto free; 1503 goto free;
1508 } 1504 }
1509 1505
1510 page = virt_to_head_page(data); 1506 skb_fill_page_desc(skb, i - 1, pfrag->page,
1511 offset = data - page_address(page); 1507 pfrag->offset, fragsz);
1512 skb_fill_page_desc(skb, i - 1, page, offset, fragsz); 1508 page_ref_inc(pfrag->page);
1509 pfrag->offset += fragsz;
1513 } 1510 }
1514 1511
1515 return skb; 1512 return skb;
@@ -2290,11 +2287,67 @@ static int tun_validate(struct nlattr *tb[], struct nlattr *data[],
2290 return -EINVAL; 2287 return -EINVAL;
2291} 2288}
2292 2289
2290static size_t tun_get_size(const struct net_device *dev)
2291{
2292 BUILD_BUG_ON(sizeof(u32) != sizeof(uid_t));
2293 BUILD_BUG_ON(sizeof(u32) != sizeof(gid_t));
2294
2295 return nla_total_size(sizeof(uid_t)) + /* OWNER */
2296 nla_total_size(sizeof(gid_t)) + /* GROUP */
2297 nla_total_size(sizeof(u8)) + /* TYPE */
2298 nla_total_size(sizeof(u8)) + /* PI */
2299 nla_total_size(sizeof(u8)) + /* VNET_HDR */
2300 nla_total_size(sizeof(u8)) + /* PERSIST */
2301 nla_total_size(sizeof(u8)) + /* MULTI_QUEUE */
2302 nla_total_size(sizeof(u32)) + /* NUM_QUEUES */
2303 nla_total_size(sizeof(u32)) + /* NUM_DISABLED_QUEUES */
2304 0;
2305}
2306
2307static int tun_fill_info(struct sk_buff *skb, const struct net_device *dev)
2308{
2309 struct tun_struct *tun = netdev_priv(dev);
2310
2311 if (nla_put_u8(skb, IFLA_TUN_TYPE, tun->flags & TUN_TYPE_MASK))
2312 goto nla_put_failure;
2313 if (uid_valid(tun->owner) &&
2314 nla_put_u32(skb, IFLA_TUN_OWNER,
2315 from_kuid_munged(current_user_ns(), tun->owner)))
2316 goto nla_put_failure;
2317 if (gid_valid(tun->group) &&
2318 nla_put_u32(skb, IFLA_TUN_GROUP,
2319 from_kgid_munged(current_user_ns(), tun->group)))
2320 goto nla_put_failure;
2321 if (nla_put_u8(skb, IFLA_TUN_PI, !(tun->flags & IFF_NO_PI)))
2322 goto nla_put_failure;
2323 if (nla_put_u8(skb, IFLA_TUN_VNET_HDR, !!(tun->flags & IFF_VNET_HDR)))
2324 goto nla_put_failure;
2325 if (nla_put_u8(skb, IFLA_TUN_PERSIST, !!(tun->flags & IFF_PERSIST)))
2326 goto nla_put_failure;
2327 if (nla_put_u8(skb, IFLA_TUN_MULTI_QUEUE,
2328 !!(tun->flags & IFF_MULTI_QUEUE)))
2329 goto nla_put_failure;
2330 if (tun->flags & IFF_MULTI_QUEUE) {
2331 if (nla_put_u32(skb, IFLA_TUN_NUM_QUEUES, tun->numqueues))
2332 goto nla_put_failure;
2333 if (nla_put_u32(skb, IFLA_TUN_NUM_DISABLED_QUEUES,
2334 tun->numdisabled))
2335 goto nla_put_failure;
2336 }
2337
2338 return 0;
2339
2340nla_put_failure:
2341 return -EMSGSIZE;
2342}
2343
2293static struct rtnl_link_ops tun_link_ops __read_mostly = { 2344static struct rtnl_link_ops tun_link_ops __read_mostly = {
2294 .kind = DRV_NAME, 2345 .kind = DRV_NAME,
2295 .priv_size = sizeof(struct tun_struct), 2346 .priv_size = sizeof(struct tun_struct),
2296 .setup = tun_setup, 2347 .setup = tun_setup,
2297 .validate = tun_validate, 2348 .validate = tun_validate,
2349 .get_size = tun_get_size,
2350 .fill_info = tun_fill_info,
2298}; 2351};
2299 2352
2300static void tun_sock_write_space(struct sock *sk) 2353static void tun_sock_write_space(struct sock *sk)
@@ -2793,6 +2846,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
2793 struct tun_struct *tun; 2846 struct tun_struct *tun;
2794 void __user* argp = (void __user*)arg; 2847 void __user* argp = (void __user*)arg;
2795 struct ifreq ifr; 2848 struct ifreq ifr;
2849 struct net *net;
2796 kuid_t owner; 2850 kuid_t owner;
2797 kgid_t group; 2851 kgid_t group;
2798 int sndbuf; 2852 int sndbuf;
@@ -2801,7 +2855,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
2801 int le; 2855 int le;
2802 int ret; 2856 int ret;
2803 2857
2804 if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == SOCK_IOC_TYPE) { 2858 if (cmd == TUNSETIFF || cmd == TUNSETQUEUE ||
2859 (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) {
2805 if (copy_from_user(&ifr, argp, ifreq_len)) 2860 if (copy_from_user(&ifr, argp, ifreq_len))
2806 return -EFAULT; 2861 return -EFAULT;
2807 } else { 2862 } else {
@@ -2821,6 +2876,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
2821 rtnl_lock(); 2876 rtnl_lock();
2822 2877
2823 tun = tun_get(tfile); 2878 tun = tun_get(tfile);
2879 net = sock_net(&tfile->sk);
2824 if (cmd == TUNSETIFF) { 2880 if (cmd == TUNSETIFF) {
2825 ret = -EEXIST; 2881 ret = -EEXIST;
2826 if (tun) 2882 if (tun)
@@ -2828,7 +2884,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
2828 2884
2829 ifr.ifr_name[IFNAMSIZ-1] = '\0'; 2885 ifr.ifr_name[IFNAMSIZ-1] = '\0';
2830 2886
2831 ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr); 2887 ret = tun_set_iff(net, file, &ifr);
2832 2888
2833 if (ret) 2889 if (ret)
2834 goto unlock; 2890 goto unlock;
@@ -2850,6 +2906,14 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
2850 tfile->ifindex = ifindex; 2906 tfile->ifindex = ifindex;
2851 goto unlock; 2907 goto unlock;
2852 } 2908 }
2909 if (cmd == SIOCGSKNS) {
2910 ret = -EPERM;
2911 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2912 goto unlock;
2913
2914 ret = open_related_ns(&net->ns, get_net_ns);
2915 goto unlock;
2916 }
2853 2917
2854 ret = -EBADFD; 2918 ret = -EBADFD;
2855 if (!tun) 2919 if (!tun)
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 139c61c8244a..951a4b42cb29 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -736,7 +736,6 @@ static int vrf_rtable_create(struct net_device *dev)
736 return -ENOMEM; 736 return -ENOMEM;
737 737
738 rth->dst.output = vrf_output; 738 rth->dst.output = vrf_output;
739 rth->rt_table_id = vrf->tb_id;
740 739
741 rcu_assign_pointer(vrf->rth, rth); 740 rcu_assign_pointer(vrf->rth, rth);
742 741
@@ -1175,6 +1174,7 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
1175 memset(frh, 0, sizeof(*frh)); 1174 memset(frh, 0, sizeof(*frh));
1176 frh->family = family; 1175 frh->family = family;
1177 frh->action = FR_ACT_TO_TBL; 1176 frh->action = FR_ACT_TO_TBL;
1177 frh->proto = RTPROT_KERNEL;
1178 1178
1179 if (nla_put_u8(skb, FRA_L3MDEV, 1)) 1179 if (nla_put_u8(skb, FRA_L3MDEV, 1))
1180 goto nla_put_failure; 1180 goto nla_put_failure;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f431c32774f3..0fe7ea35c221 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -120,8 +120,12 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
120 int ret; 120 int ret;
121 121
122 ret = nvme_reset_ctrl(ctrl); 122 ret = nvme_reset_ctrl(ctrl);
123 if (!ret) 123 if (!ret) {
124 flush_work(&ctrl->reset_work); 124 flush_work(&ctrl->reset_work);
125 if (ctrl->state != NVME_CTRL_LIVE)
126 ret = -ENETRESET;
127 }
128
125 return ret; 129 return ret;
126} 130}
127EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync); 131EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync);
@@ -265,7 +269,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
265 switch (new_state) { 269 switch (new_state) {
266 case NVME_CTRL_ADMIN_ONLY: 270 case NVME_CTRL_ADMIN_ONLY:
267 switch (old_state) { 271 switch (old_state) {
268 case NVME_CTRL_RECONNECTING: 272 case NVME_CTRL_CONNECTING:
269 changed = true; 273 changed = true;
270 /* FALLTHRU */ 274 /* FALLTHRU */
271 default: 275 default:
@@ -276,7 +280,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
276 switch (old_state) { 280 switch (old_state) {
277 case NVME_CTRL_NEW: 281 case NVME_CTRL_NEW:
278 case NVME_CTRL_RESETTING: 282 case NVME_CTRL_RESETTING:
279 case NVME_CTRL_RECONNECTING: 283 case NVME_CTRL_CONNECTING:
280 changed = true; 284 changed = true;
281 /* FALLTHRU */ 285 /* FALLTHRU */
282 default: 286 default:
@@ -294,9 +298,9 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
294 break; 298 break;
295 } 299 }
296 break; 300 break;
297 case NVME_CTRL_RECONNECTING: 301 case NVME_CTRL_CONNECTING:
298 switch (old_state) { 302 switch (old_state) {
299 case NVME_CTRL_LIVE: 303 case NVME_CTRL_NEW:
300 case NVME_CTRL_RESETTING: 304 case NVME_CTRL_RESETTING:
301 changed = true; 305 changed = true;
302 /* FALLTHRU */ 306 /* FALLTHRU */
@@ -309,7 +313,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
309 case NVME_CTRL_LIVE: 313 case NVME_CTRL_LIVE:
310 case NVME_CTRL_ADMIN_ONLY: 314 case NVME_CTRL_ADMIN_ONLY:
311 case NVME_CTRL_RESETTING: 315 case NVME_CTRL_RESETTING:
312 case NVME_CTRL_RECONNECTING: 316 case NVME_CTRL_CONNECTING:
313 changed = true; 317 changed = true;
314 /* FALLTHRU */ 318 /* FALLTHRU */
315 default: 319 default:
@@ -518,9 +522,11 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
518 u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector); 522 u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
519 u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift; 523 u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
520 524
521 range[n].cattr = cpu_to_le32(0); 525 if (n < segments) {
522 range[n].nlb = cpu_to_le32(nlb); 526 range[n].cattr = cpu_to_le32(0);
523 range[n].slba = cpu_to_le64(slba); 527 range[n].nlb = cpu_to_le32(nlb);
528 range[n].slba = cpu_to_le64(slba);
529 }
524 n++; 530 n++;
525 } 531 }
526 532
@@ -794,13 +800,9 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
794 800
795static int nvme_keep_alive(struct nvme_ctrl *ctrl) 801static int nvme_keep_alive(struct nvme_ctrl *ctrl)
796{ 802{
797 struct nvme_command c;
798 struct request *rq; 803 struct request *rq;
799 804
800 memset(&c, 0, sizeof(c)); 805 rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, BLK_MQ_REQ_RESERVED,
801 c.common.opcode = nvme_admin_keep_alive;
802
803 rq = nvme_alloc_request(ctrl->admin_q, &c, BLK_MQ_REQ_RESERVED,
804 NVME_QID_ANY); 806 NVME_QID_ANY);
805 if (IS_ERR(rq)) 807 if (IS_ERR(rq))
806 return PTR_ERR(rq); 808 return PTR_ERR(rq);
@@ -832,6 +834,8 @@ void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
832 return; 834 return;
833 835
834 INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work); 836 INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
837 memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
838 ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
835 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); 839 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
836} 840}
837EXPORT_SYMBOL_GPL(nvme_start_keep_alive); 841EXPORT_SYMBOL_GPL(nvme_start_keep_alive);
@@ -1117,14 +1121,19 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
1117 1121
1118static void nvme_update_formats(struct nvme_ctrl *ctrl) 1122static void nvme_update_formats(struct nvme_ctrl *ctrl)
1119{ 1123{
1120 struct nvme_ns *ns; 1124 struct nvme_ns *ns, *next;
1125 LIST_HEAD(rm_list);
1121 1126
1122 mutex_lock(&ctrl->namespaces_mutex); 1127 mutex_lock(&ctrl->namespaces_mutex);
1123 list_for_each_entry(ns, &ctrl->namespaces, list) { 1128 list_for_each_entry(ns, &ctrl->namespaces, list) {
1124 if (ns->disk && nvme_revalidate_disk(ns->disk)) 1129 if (ns->disk && nvme_revalidate_disk(ns->disk)) {
1125 nvme_ns_remove(ns); 1130 list_move_tail(&ns->list, &rm_list);
1131 }
1126 } 1132 }
1127 mutex_unlock(&ctrl->namespaces_mutex); 1133 mutex_unlock(&ctrl->namespaces_mutex);
1134
1135 list_for_each_entry_safe(ns, next, &rm_list, list)
1136 nvme_ns_remove(ns);
1128} 1137}
1129 1138
1130static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) 1139static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
@@ -2687,7 +2696,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
2687 [NVME_CTRL_LIVE] = "live", 2696 [NVME_CTRL_LIVE] = "live",
2688 [NVME_CTRL_ADMIN_ONLY] = "only-admin", 2697 [NVME_CTRL_ADMIN_ONLY] = "only-admin",
2689 [NVME_CTRL_RESETTING] = "resetting", 2698 [NVME_CTRL_RESETTING] = "resetting",
2690 [NVME_CTRL_RECONNECTING]= "reconnecting", 2699 [NVME_CTRL_CONNECTING] = "connecting",
2691 [NVME_CTRL_DELETING] = "deleting", 2700 [NVME_CTRL_DELETING] = "deleting",
2692 [NVME_CTRL_DEAD] = "dead", 2701 [NVME_CTRL_DEAD] = "dead",
2693 }; 2702 };
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 25b19f722f5b..a3145d90c1d2 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -171,13 +171,14 @@ static inline blk_status_t nvmf_check_init_req(struct nvme_ctrl *ctrl,
171 cmd->common.opcode != nvme_fabrics_command || 171 cmd->common.opcode != nvme_fabrics_command ||
172 cmd->fabrics.fctype != nvme_fabrics_type_connect) { 172 cmd->fabrics.fctype != nvme_fabrics_type_connect) {
173 /* 173 /*
174 * Reconnecting state means transport disruption, which can take 174 * Connecting state means transport disruption or initial
175 * a long time and even might fail permanently, fail fast to 175 * establishment, which can take a long time and even might
176 * give upper layers a chance to failover. 176 * fail permanently, fail fast to give upper layers a chance
177 * to failover.
177 * Deleting state means that the ctrl will never accept commands 178 * Deleting state means that the ctrl will never accept commands
178 * again, fail it permanently. 179 * again, fail it permanently.
179 */ 180 */
180 if (ctrl->state == NVME_CTRL_RECONNECTING || 181 if (ctrl->state == NVME_CTRL_CONNECTING ||
181 ctrl->state == NVME_CTRL_DELETING) { 182 ctrl->state == NVME_CTRL_DELETING) {
182 nvme_req(rq)->status = NVME_SC_ABORT_REQ; 183 nvme_req(rq)->status = NVME_SC_ABORT_REQ;
183 return BLK_STS_IOERR; 184 return BLK_STS_IOERR;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index b856d7c919d2..7f51f8414b97 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -55,9 +55,7 @@ struct nvme_fc_queue {
55 55
56enum nvme_fcop_flags { 56enum nvme_fcop_flags {
57 FCOP_FLAGS_TERMIO = (1 << 0), 57 FCOP_FLAGS_TERMIO = (1 << 0),
58 FCOP_FLAGS_RELEASED = (1 << 1), 58 FCOP_FLAGS_AEN = (1 << 1),
59 FCOP_FLAGS_COMPLETE = (1 << 2),
60 FCOP_FLAGS_AEN = (1 << 3),
61}; 59};
62 60
63struct nvmefc_ls_req_op { 61struct nvmefc_ls_req_op {
@@ -532,7 +530,7 @@ nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
532{ 530{
533 switch (ctrl->ctrl.state) { 531 switch (ctrl->ctrl.state) {
534 case NVME_CTRL_NEW: 532 case NVME_CTRL_NEW:
535 case NVME_CTRL_RECONNECTING: 533 case NVME_CTRL_CONNECTING:
536 /* 534 /*
537 * As all reconnects were suppressed, schedule a 535 * As all reconnects were suppressed, schedule a
538 * connect. 536 * connect.
@@ -777,7 +775,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
777 } 775 }
778 break; 776 break;
779 777
780 case NVME_CTRL_RECONNECTING: 778 case NVME_CTRL_CONNECTING:
781 /* 779 /*
782 * The association has already been terminated and the 780 * The association has already been terminated and the
783 * controller is attempting reconnects. No need to do anything 781 * controller is attempting reconnects. No need to do anything
@@ -1470,7 +1468,6 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
1470 1468
1471/* *********************** NVME Ctrl Routines **************************** */ 1469/* *********************** NVME Ctrl Routines **************************** */
1472 1470
1473static void __nvme_fc_final_op_cleanup(struct request *rq);
1474static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); 1471static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
1475 1472
1476static int 1473static int
@@ -1512,13 +1509,19 @@ nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq,
1512static int 1509static int
1513__nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) 1510__nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
1514{ 1511{
1515 int state; 1512 unsigned long flags;
1513 int opstate;
1514
1515 spin_lock_irqsave(&ctrl->lock, flags);
1516 opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
1517 if (opstate != FCPOP_STATE_ACTIVE)
1518 atomic_set(&op->state, opstate);
1519 else if (ctrl->flags & FCCTRL_TERMIO)
1520 ctrl->iocnt++;
1521 spin_unlock_irqrestore(&ctrl->lock, flags);
1516 1522
1517 state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); 1523 if (opstate != FCPOP_STATE_ACTIVE)
1518 if (state != FCPOP_STATE_ACTIVE) {
1519 atomic_set(&op->state, state);
1520 return -ECANCELED; 1524 return -ECANCELED;
1521 }
1522 1525
1523 ctrl->lport->ops->fcp_abort(&ctrl->lport->localport, 1526 ctrl->lport->ops->fcp_abort(&ctrl->lport->localport,
1524 &ctrl->rport->remoteport, 1527 &ctrl->rport->remoteport,
@@ -1532,60 +1535,26 @@ static void
1532nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl) 1535nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl)
1533{ 1536{
1534 struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops; 1537 struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops;
1535 unsigned long flags; 1538 int i;
1536 int i, ret;
1537
1538 for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
1539 if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE)
1540 continue;
1541
1542 spin_lock_irqsave(&ctrl->lock, flags);
1543 if (ctrl->flags & FCCTRL_TERMIO) {
1544 ctrl->iocnt++;
1545 aen_op->flags |= FCOP_FLAGS_TERMIO;
1546 }
1547 spin_unlock_irqrestore(&ctrl->lock, flags);
1548
1549 ret = __nvme_fc_abort_op(ctrl, aen_op);
1550 if (ret) {
1551 /*
1552 * if __nvme_fc_abort_op failed the io wasn't
1553 * active. Thus this call path is running in
1554 * parallel to the io complete. Treat as non-error.
1555 */
1556 1539
1557 /* back out the flags/counters */ 1540 for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++)
1558 spin_lock_irqsave(&ctrl->lock, flags); 1541 __nvme_fc_abort_op(ctrl, aen_op);
1559 if (ctrl->flags & FCCTRL_TERMIO)
1560 ctrl->iocnt--;
1561 aen_op->flags &= ~FCOP_FLAGS_TERMIO;
1562 spin_unlock_irqrestore(&ctrl->lock, flags);
1563 return;
1564 }
1565 }
1566} 1542}
1567 1543
1568static inline int 1544static inline void
1569__nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, 1545__nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
1570 struct nvme_fc_fcp_op *op) 1546 struct nvme_fc_fcp_op *op, int opstate)
1571{ 1547{
1572 unsigned long flags; 1548 unsigned long flags;
1573 bool complete_rq = false;
1574 1549
1575 spin_lock_irqsave(&ctrl->lock, flags); 1550 if (opstate == FCPOP_STATE_ABORTED) {
1576 if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { 1551 spin_lock_irqsave(&ctrl->lock, flags);
1577 if (ctrl->flags & FCCTRL_TERMIO) { 1552 if (ctrl->flags & FCCTRL_TERMIO) {
1578 if (!--ctrl->iocnt) 1553 if (!--ctrl->iocnt)
1579 wake_up(&ctrl->ioabort_wait); 1554 wake_up(&ctrl->ioabort_wait);
1580 } 1555 }
1556 spin_unlock_irqrestore(&ctrl->lock, flags);
1581 } 1557 }
1582 if (op->flags & FCOP_FLAGS_RELEASED)
1583 complete_rq = true;
1584 else
1585 op->flags |= FCOP_FLAGS_COMPLETE;
1586 spin_unlock_irqrestore(&ctrl->lock, flags);
1587
1588 return complete_rq;
1589} 1558}
1590 1559
1591static void 1560static void
@@ -1601,6 +1570,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
1601 __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); 1570 __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
1602 union nvme_result result; 1571 union nvme_result result;
1603 bool terminate_assoc = true; 1572 bool terminate_assoc = true;
1573 int opstate;
1604 1574
1605 /* 1575 /*
1606 * WARNING: 1576 * WARNING:
@@ -1639,11 +1609,12 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
1639 * association to be terminated. 1609 * association to be terminated.
1640 */ 1610 */
1641 1611
1612 opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
1613
1642 fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma, 1614 fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma,
1643 sizeof(op->rsp_iu), DMA_FROM_DEVICE); 1615 sizeof(op->rsp_iu), DMA_FROM_DEVICE);
1644 1616
1645 if (atomic_read(&op->state) == FCPOP_STATE_ABORTED || 1617 if (opstate == FCPOP_STATE_ABORTED)
1646 op->flags & FCOP_FLAGS_TERMIO)
1647 status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); 1618 status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
1648 else if (freq->status) 1619 else if (freq->status)
1649 status = cpu_to_le16(NVME_SC_INTERNAL << 1); 1620 status = cpu_to_le16(NVME_SC_INTERNAL << 1);
@@ -1708,7 +1679,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
1708done: 1679done:
1709 if (op->flags & FCOP_FLAGS_AEN) { 1680 if (op->flags & FCOP_FLAGS_AEN) {
1710 nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); 1681 nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
1711 __nvme_fc_fcpop_chk_teardowns(ctrl, op); 1682 __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
1712 atomic_set(&op->state, FCPOP_STATE_IDLE); 1683 atomic_set(&op->state, FCPOP_STATE_IDLE);
1713 op->flags = FCOP_FLAGS_AEN; /* clear other flags */ 1684 op->flags = FCOP_FLAGS_AEN; /* clear other flags */
1714 nvme_fc_ctrl_put(ctrl); 1685 nvme_fc_ctrl_put(ctrl);
@@ -1722,13 +1693,11 @@ done:
1722 if (status && 1693 if (status &&
1723 (blk_queue_dying(rq->q) || 1694 (blk_queue_dying(rq->q) ||
1724 ctrl->ctrl.state == NVME_CTRL_NEW || 1695 ctrl->ctrl.state == NVME_CTRL_NEW ||
1725 ctrl->ctrl.state == NVME_CTRL_RECONNECTING)) 1696 ctrl->ctrl.state == NVME_CTRL_CONNECTING))
1726 status |= cpu_to_le16(NVME_SC_DNR << 1); 1697 status |= cpu_to_le16(NVME_SC_DNR << 1);
1727 1698
1728 if (__nvme_fc_fcpop_chk_teardowns(ctrl, op)) 1699 __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
1729 __nvme_fc_final_op_cleanup(rq); 1700 nvme_end_request(rq, status, result);
1730 else
1731 nvme_end_request(rq, status, result);
1732 1701
1733check_error: 1702check_error:
1734 if (terminate_assoc) 1703 if (terminate_assoc)
@@ -2415,46 +2384,16 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg)
2415} 2384}
2416 2385
2417static void 2386static void
2418__nvme_fc_final_op_cleanup(struct request *rq) 2387nvme_fc_complete_rq(struct request *rq)
2419{ 2388{
2420 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 2389 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
2421 struct nvme_fc_ctrl *ctrl = op->ctrl; 2390 struct nvme_fc_ctrl *ctrl = op->ctrl;
2422 2391
2423 atomic_set(&op->state, FCPOP_STATE_IDLE); 2392 atomic_set(&op->state, FCPOP_STATE_IDLE);
2424 op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED |
2425 FCOP_FLAGS_COMPLETE);
2426 2393
2427 nvme_fc_unmap_data(ctrl, rq, op); 2394 nvme_fc_unmap_data(ctrl, rq, op);
2428 nvme_complete_rq(rq); 2395 nvme_complete_rq(rq);
2429 nvme_fc_ctrl_put(ctrl); 2396 nvme_fc_ctrl_put(ctrl);
2430
2431}
2432
2433static void
2434nvme_fc_complete_rq(struct request *rq)
2435{
2436 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
2437 struct nvme_fc_ctrl *ctrl = op->ctrl;
2438 unsigned long flags;
2439 bool completed = false;
2440
2441 /*
2442 * the core layer, on controller resets after calling
2443 * nvme_shutdown_ctrl(), calls complete_rq without our
2444 * calling blk_mq_complete_request(), thus there may still
2445 * be live i/o outstanding with the LLDD. Means transport has
2446 * to track complete calls vs fcpio_done calls to know what
2447 * path to take on completes and dones.
2448 */
2449 spin_lock_irqsave(&ctrl->lock, flags);
2450 if (op->flags & FCOP_FLAGS_COMPLETE)
2451 completed = true;
2452 else
2453 op->flags |= FCOP_FLAGS_RELEASED;
2454 spin_unlock_irqrestore(&ctrl->lock, flags);
2455
2456 if (completed)
2457 __nvme_fc_final_op_cleanup(rq);
2458} 2397}
2459 2398
2460/* 2399/*
@@ -2476,35 +2415,11 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
2476 struct nvme_ctrl *nctrl = data; 2415 struct nvme_ctrl *nctrl = data;
2477 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2416 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
2478 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); 2417 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
2479 unsigned long flags;
2480 int status;
2481 2418
2482 if (!blk_mq_request_started(req)) 2419 if (!blk_mq_request_started(req))
2483 return; 2420 return;
2484 2421
2485 spin_lock_irqsave(&ctrl->lock, flags); 2422 __nvme_fc_abort_op(ctrl, op);
2486 if (ctrl->flags & FCCTRL_TERMIO) {
2487 ctrl->iocnt++;
2488 op->flags |= FCOP_FLAGS_TERMIO;
2489 }
2490 spin_unlock_irqrestore(&ctrl->lock, flags);
2491
2492 status = __nvme_fc_abort_op(ctrl, op);
2493 if (status) {
2494 /*
2495 * if __nvme_fc_abort_op failed the io wasn't
2496 * active. Thus this call path is running in
2497 * parallel to the io complete. Treat as non-error.
2498 */
2499
2500 /* back out the flags/counters */
2501 spin_lock_irqsave(&ctrl->lock, flags);
2502 if (ctrl->flags & FCCTRL_TERMIO)
2503 ctrl->iocnt--;
2504 op->flags &= ~FCOP_FLAGS_TERMIO;
2505 spin_unlock_irqrestore(&ctrl->lock, flags);
2506 return;
2507 }
2508} 2423}
2509 2424
2510 2425
@@ -2943,7 +2858,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
2943 unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ; 2858 unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;
2944 bool recon = true; 2859 bool recon = true;
2945 2860
2946 if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) 2861 if (ctrl->ctrl.state != NVME_CTRL_CONNECTING)
2947 return; 2862 return;
2948 2863
2949 if (portptr->port_state == FC_OBJSTATE_ONLINE) 2864 if (portptr->port_state == FC_OBJSTATE_ONLINE)
@@ -2991,10 +2906,10 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
2991 /* will block will waiting for io to terminate */ 2906 /* will block will waiting for io to terminate */
2992 nvme_fc_delete_association(ctrl); 2907 nvme_fc_delete_association(ctrl);
2993 2908
2994 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { 2909 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
2995 dev_err(ctrl->ctrl.device, 2910 dev_err(ctrl->ctrl.device,
2996 "NVME-FC{%d}: error_recovery: Couldn't change state " 2911 "NVME-FC{%d}: error_recovery: Couldn't change state "
2997 "to RECONNECTING\n", ctrl->cnum); 2912 "to CONNECTING\n", ctrl->cnum);
2998 return; 2913 return;
2999 } 2914 }
3000 2915
@@ -3195,7 +3110,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
3195 * transport errors (frame drop, LS failure) inherently must kill 3110 * transport errors (frame drop, LS failure) inherently must kill
3196 * the association. The transport is coded so that any command used 3111 * the association. The transport is coded so that any command used
3197 * to create the association (prior to a LIVE state transition 3112 * to create the association (prior to a LIVE state transition
3198 * while NEW or RECONNECTING) will fail if it completes in error or 3113 * while NEW or CONNECTING) will fail if it completes in error or
3199 * times out. 3114 * times out.
3200 * 3115 *
3201 * As such: as the connect request was mostly likely due to a 3116 * As such: as the connect request was mostly likely due to a
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 8e4550fa08f8..0521e4707d1c 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -123,7 +123,7 @@ enum nvme_ctrl_state {
123 NVME_CTRL_LIVE, 123 NVME_CTRL_LIVE,
124 NVME_CTRL_ADMIN_ONLY, /* Only admin queue live */ 124 NVME_CTRL_ADMIN_ONLY, /* Only admin queue live */
125 NVME_CTRL_RESETTING, 125 NVME_CTRL_RESETTING,
126 NVME_CTRL_RECONNECTING, 126 NVME_CTRL_CONNECTING,
127 NVME_CTRL_DELETING, 127 NVME_CTRL_DELETING,
128 NVME_CTRL_DEAD, 128 NVME_CTRL_DEAD,
129}; 129};
@@ -183,6 +183,7 @@ struct nvme_ctrl {
183 struct work_struct scan_work; 183 struct work_struct scan_work;
184 struct work_struct async_event_work; 184 struct work_struct async_event_work;
185 struct delayed_work ka_work; 185 struct delayed_work ka_work;
186 struct nvme_command ka_cmd;
186 struct work_struct fw_act_work; 187 struct work_struct fw_act_work;
187 188
188 /* Power saving configuration */ 189 /* Power saving configuration */
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 6fe7af00a1f4..73036d2fbbd5 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1141,7 +1141,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
1141 /* If there is a reset/reinit ongoing, we shouldn't reset again. */ 1141 /* If there is a reset/reinit ongoing, we shouldn't reset again. */
1142 switch (dev->ctrl.state) { 1142 switch (dev->ctrl.state) {
1143 case NVME_CTRL_RESETTING: 1143 case NVME_CTRL_RESETTING:
1144 case NVME_CTRL_RECONNECTING: 1144 case NVME_CTRL_CONNECTING:
1145 return false; 1145 return false;
1146 default: 1146 default:
1147 break; 1147 break;
@@ -1215,13 +1215,17 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
1215 * cancellation error. All outstanding requests are completed on 1215 * cancellation error. All outstanding requests are completed on
1216 * shutdown, so we return BLK_EH_HANDLED. 1216 * shutdown, so we return BLK_EH_HANDLED.
1217 */ 1217 */
1218 if (dev->ctrl.state == NVME_CTRL_RESETTING) { 1218 switch (dev->ctrl.state) {
1219 case NVME_CTRL_CONNECTING:
1220 case NVME_CTRL_RESETTING:
1219 dev_warn(dev->ctrl.device, 1221 dev_warn(dev->ctrl.device,
1220 "I/O %d QID %d timeout, disable controller\n", 1222 "I/O %d QID %d timeout, disable controller\n",
1221 req->tag, nvmeq->qid); 1223 req->tag, nvmeq->qid);
1222 nvme_dev_disable(dev, false); 1224 nvme_dev_disable(dev, false);
1223 nvme_req(req)->flags |= NVME_REQ_CANCELLED; 1225 nvme_req(req)->flags |= NVME_REQ_CANCELLED;
1224 return BLK_EH_HANDLED; 1226 return BLK_EH_HANDLED;
1227 default:
1228 break;
1225 } 1229 }
1226 1230
1227 /* 1231 /*
@@ -1364,18 +1368,14 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
1364static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, 1368static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
1365 int qid, int depth) 1369 int qid, int depth)
1366{ 1370{
1367 if (qid && dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) { 1371 /* CMB SQEs will be mapped before creation */
1368 unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth), 1372 if (qid && dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS))
1369 dev->ctrl.page_size); 1373 return 0;
1370 nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
1371 nvmeq->sq_cmds_io = dev->cmb + offset;
1372 } else {
1373 nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
1374 &nvmeq->sq_dma_addr, GFP_KERNEL);
1375 if (!nvmeq->sq_cmds)
1376 return -ENOMEM;
1377 }
1378 1374
1375 nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
1376 &nvmeq->sq_dma_addr, GFP_KERNEL);
1377 if (!nvmeq->sq_cmds)
1378 return -ENOMEM;
1379 return 0; 1379 return 0;
1380} 1380}
1381 1381
@@ -1449,6 +1449,13 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
1449 struct nvme_dev *dev = nvmeq->dev; 1449 struct nvme_dev *dev = nvmeq->dev;
1450 int result; 1450 int result;
1451 1451
1452 if (dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
1453 unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth),
1454 dev->ctrl.page_size);
1455 nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
1456 nvmeq->sq_cmds_io = dev->cmb + offset;
1457 }
1458
1452 nvmeq->cq_vector = qid - 1; 1459 nvmeq->cq_vector = qid - 1;
1453 result = adapter_alloc_cq(dev, qid, nvmeq); 1460 result = adapter_alloc_cq(dev, qid, nvmeq);
1454 if (result < 0) 1461 if (result < 0)
@@ -2288,12 +2295,12 @@ static void nvme_reset_work(struct work_struct *work)
2288 nvme_dev_disable(dev, false); 2295 nvme_dev_disable(dev, false);
2289 2296
2290 /* 2297 /*
2291 * Introduce RECONNECTING state from nvme-fc/rdma transports to mark the 2298 * Introduce CONNECTING state from nvme-fc/rdma transports to mark the
2292 * initializing procedure here. 2299 * initializing procedure here.
2293 */ 2300 */
2294 if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RECONNECTING)) { 2301 if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
2295 dev_warn(dev->ctrl.device, 2302 dev_warn(dev->ctrl.device,
2296 "failed to mark controller RECONNECTING\n"); 2303 "failed to mark controller CONNECTING\n");
2297 goto out; 2304 goto out;
2298 } 2305 }
2299 2306
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 2bc059f7d73c..3a51ed50eff2 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -887,7 +887,7 @@ free_ctrl:
887static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) 887static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
888{ 888{
889 /* If we are resetting/deleting then do nothing */ 889 /* If we are resetting/deleting then do nothing */
890 if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) { 890 if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
891 WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW || 891 WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
892 ctrl->ctrl.state == NVME_CTRL_LIVE); 892 ctrl->ctrl.state == NVME_CTRL_LIVE);
893 return; 893 return;
@@ -973,7 +973,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
973 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 973 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
974 nvme_start_queues(&ctrl->ctrl); 974 nvme_start_queues(&ctrl->ctrl);
975 975
976 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { 976 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
977 /* state change failure should never happen */ 977 /* state change failure should never happen */
978 WARN_ON_ONCE(1); 978 WARN_ON_ONCE(1);
979 return; 979 return;
@@ -1756,7 +1756,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
1756 nvme_stop_ctrl(&ctrl->ctrl); 1756 nvme_stop_ctrl(&ctrl->ctrl);
1757 nvme_rdma_shutdown_ctrl(ctrl, false); 1757 nvme_rdma_shutdown_ctrl(ctrl, false);
1758 1758
1759 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { 1759 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
1760 /* state change failure should never happen */ 1760 /* state change failure should never happen */
1761 WARN_ON_ONCE(1); 1761 WARN_ON_ONCE(1);
1762 return; 1762 return;
@@ -1784,11 +1784,8 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
1784 return; 1784 return;
1785 1785
1786out_fail: 1786out_fail:
1787 dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); 1787 ++ctrl->ctrl.nr_reconnects;
1788 nvme_remove_namespaces(&ctrl->ctrl); 1788 nvme_rdma_reconnect_or_remove(ctrl);
1789 nvme_rdma_shutdown_ctrl(ctrl, true);
1790 nvme_uninit_ctrl(&ctrl->ctrl);
1791 nvme_put_ctrl(&ctrl->ctrl);
1792} 1789}
1793 1790
1794static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { 1791static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
@@ -1942,6 +1939,9 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
1942 if (!ctrl->queues) 1939 if (!ctrl->queues)
1943 goto out_uninit_ctrl; 1940 goto out_uninit_ctrl;
1944 1941
1942 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
1943 WARN_ON_ONCE(!changed);
1944
1945 ret = nvme_rdma_configure_admin_queue(ctrl, true); 1945 ret = nvme_rdma_configure_admin_queue(ctrl, true);
1946 if (ret) 1946 if (ret)
1947 goto out_kfree_queues; 1947 goto out_kfree_queues;
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index 0a4372a016f2..28bbdff4a88b 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -105,10 +105,13 @@ static void nvmet_execute_flush(struct nvmet_req *req)
105static u16 nvmet_discard_range(struct nvmet_ns *ns, 105static u16 nvmet_discard_range(struct nvmet_ns *ns,
106 struct nvme_dsm_range *range, struct bio **bio) 106 struct nvme_dsm_range *range, struct bio **bio)
107{ 107{
108 if (__blkdev_issue_discard(ns->bdev, 108 int ret;
109
110 ret = __blkdev_issue_discard(ns->bdev,
109 le64_to_cpu(range->slba) << (ns->blksize_shift - 9), 111 le64_to_cpu(range->slba) << (ns->blksize_shift - 9),
110 le32_to_cpu(range->nlb) << (ns->blksize_shift - 9), 112 le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
111 GFP_KERNEL, 0, bio)) 113 GFP_KERNEL, 0, bio);
114 if (ret && ret != -EOPNOTSUPP)
112 return NVME_SC_INTERNAL | NVME_SC_DNR; 115 return NVME_SC_INTERNAL | NVME_SC_DNR;
113 return 0; 116 return 0;
114} 117}
diff --git a/drivers/of/property.c b/drivers/of/property.c
index 36ed84e26d9c..f46828e3b082 100644
--- a/drivers/of/property.c
+++ b/drivers/of/property.c
@@ -977,11 +977,11 @@ static int of_fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode,
977 return 0; 977 return 0;
978} 978}
979 979
980static void * 980static const void *
981of_fwnode_device_get_match_data(const struct fwnode_handle *fwnode, 981of_fwnode_device_get_match_data(const struct fwnode_handle *fwnode,
982 const struct device *dev) 982 const struct device *dev)
983{ 983{
984 return (void *)of_device_get_match_data(dev); 984 return of_device_get_match_data(dev);
985} 985}
986 986
987const struct fwnode_operations of_fwnode_ops = { 987const struct fwnode_operations of_fwnode_ops = {
diff --git a/drivers/opp/cpu.c b/drivers/opp/cpu.c
index 2d87bc1adf38..0c0910709435 100644
--- a/drivers/opp/cpu.c
+++ b/drivers/opp/cpu.c
@@ -55,7 +55,7 @@ int dev_pm_opp_init_cpufreq_table(struct device *dev,
55 if (max_opps <= 0) 55 if (max_opps <= 0)
56 return max_opps ? max_opps : -ENODATA; 56 return max_opps ? max_opps : -ENODATA;
57 57
58 freq_table = kcalloc((max_opps + 1), sizeof(*freq_table), GFP_ATOMIC); 58 freq_table = kcalloc((max_opps + 1), sizeof(*freq_table), GFP_KERNEL);
59 if (!freq_table) 59 if (!freq_table)
60 return -ENOMEM; 60 return -ENOMEM;
61 61
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index fc734014206f..8b14bd326d4a 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3419,22 +3419,29 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PORT_RIDGE,
3419 3419
3420static void quirk_chelsio_extend_vpd(struct pci_dev *dev) 3420static void quirk_chelsio_extend_vpd(struct pci_dev *dev)
3421{ 3421{
3422 pci_set_vpd_size(dev, 8192); 3422 int chip = (dev->device & 0xf000) >> 12;
3423} 3423 int func = (dev->device & 0x0f00) >> 8;
3424 3424 int prod = (dev->device & 0x00ff) >> 0;
3425DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x20, quirk_chelsio_extend_vpd); 3425
3426DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x21, quirk_chelsio_extend_vpd); 3426 /*
3427DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x22, quirk_chelsio_extend_vpd); 3427 * If this is a T3-based adapter, there's a 1KB VPD area at offset
3428DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x23, quirk_chelsio_extend_vpd); 3428 * 0xc00 which contains the preferred VPD values. If this is a T4 or
3429DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x24, quirk_chelsio_extend_vpd); 3429 * later based adapter, the special VPD is at offset 0x400 for the
3430DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x25, quirk_chelsio_extend_vpd); 3430 * Physical Functions (the SR-IOV Virtual Functions have no VPD
3431DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x26, quirk_chelsio_extend_vpd); 3431 * Capabilities). The PCI VPD Access core routines will normally
3432DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x30, quirk_chelsio_extend_vpd); 3432 * compute the size of the VPD by parsing the VPD Data Structure at
3433DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x31, quirk_chelsio_extend_vpd); 3433 * offset 0x000. This will result in silent failures when attempting
3434DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x32, quirk_chelsio_extend_vpd); 3434 * to accesses these other VPD areas which are beyond those computed
3435DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x35, quirk_chelsio_extend_vpd); 3435 * limits.
3436DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x36, quirk_chelsio_extend_vpd); 3436 */
3437DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x37, quirk_chelsio_extend_vpd); 3437 if (chip == 0x0 && prod >= 0x20)
3438 pci_set_vpd_size(dev, 8192);
3439 else if (chip >= 0x4 && func < 0x8)
3440 pci_set_vpd_size(dev, 2048);
3441}
3442
3443DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID,
3444 quirk_chelsio_extend_vpd);
3438 3445
3439#ifdef CONFIG_ACPI 3446#ifdef CONFIG_ACPI
3440/* 3447/*
diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c
index 2a68f59d2228..c52c6723374b 100644
--- a/drivers/platform/x86/dell-laptop.c
+++ b/drivers/platform/x86/dell-laptop.c
@@ -127,24 +127,6 @@ static const struct dmi_system_id dell_device_table[] __initconst = {
127 }, 127 },
128 }, 128 },
129 { 129 {
130 .matches = {
131 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
132 DMI_MATCH(DMI_CHASSIS_TYPE, "30"), /*Tablet*/
133 },
134 },
135 {
136 .matches = {
137 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
138 DMI_MATCH(DMI_CHASSIS_TYPE, "31"), /*Convertible*/
139 },
140 },
141 {
142 .matches = {
143 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
144 DMI_MATCH(DMI_CHASSIS_TYPE, "32"), /*Detachable*/
145 },
146 },
147 {
148 .ident = "Dell Computer Corporation", 130 .ident = "Dell Computer Corporation",
149 .matches = { 131 .matches = {
150 DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), 132 DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
@@ -1279,7 +1261,7 @@ static int kbd_get_state(struct kbd_state *state)
1279 struct calling_interface_buffer buffer; 1261 struct calling_interface_buffer buffer;
1280 int ret; 1262 int ret;
1281 1263
1282 dell_fill_request(&buffer, 0, 0, 0, 0); 1264 dell_fill_request(&buffer, 0x1, 0, 0, 0);
1283 ret = dell_send_request(&buffer, 1265 ret = dell_send_request(&buffer,
1284 CLASS_KBD_BACKLIGHT, SELECT_KBD_BACKLIGHT); 1266 CLASS_KBD_BACKLIGHT, SELECT_KBD_BACKLIGHT);
1285 if (ret) 1267 if (ret)
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 5b6f18b18801..535199c9e6bc 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -113,7 +113,7 @@ MODULE_PARM_DESC(no_bt_rfkill, "No rfkill for bluetooth.");
113/* 113/*
114 * ACPI Helpers 114 * ACPI Helpers
115 */ 115 */
116#define IDEAPAD_EC_TIMEOUT (100) /* in ms */ 116#define IDEAPAD_EC_TIMEOUT (200) /* in ms */
117 117
118static int read_method_int(acpi_handle handle, const char *method, int *val) 118static int read_method_int(acpi_handle handle, const char *method, int *val)
119{ 119{
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index daa68acbc900..c0c8945603cb 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -933,7 +933,7 @@ static int wmi_dev_probe(struct device *dev)
933 goto probe_failure; 933 goto probe_failure;
934 } 934 }
935 935
936 buf = kmalloc(strlen(wdriver->driver.name) + 4, GFP_KERNEL); 936 buf = kmalloc(strlen(wdriver->driver.name) + 5, GFP_KERNEL);
937 if (!buf) { 937 if (!buf) {
938 ret = -ENOMEM; 938 ret = -ENOMEM;
939 goto probe_string_failure; 939 goto probe_string_failure;
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index ba2e0856d22c..8f5c1d7f751a 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -1297,6 +1297,9 @@ static int virtio_ccw_cio_notify(struct ccw_device *cdev, int event)
1297 vcdev->device_lost = true; 1297 vcdev->device_lost = true;
1298 rc = NOTIFY_DONE; 1298 rc = NOTIFY_DONE;
1299 break; 1299 break;
1300 case CIO_OPER:
1301 rc = NOTIFY_OK;
1302 break;
1300 default: 1303 default:
1301 rc = NOTIFY_DONE; 1304 rc = NOTIFY_DONE;
1302 break; 1305 break;
@@ -1309,6 +1312,27 @@ static struct ccw_device_id virtio_ids[] = {
1309 {}, 1312 {},
1310}; 1313};
1311 1314
1315#ifdef CONFIG_PM_SLEEP
1316static int virtio_ccw_freeze(struct ccw_device *cdev)
1317{
1318 struct virtio_ccw_device *vcdev = dev_get_drvdata(&cdev->dev);
1319
1320 return virtio_device_freeze(&vcdev->vdev);
1321}
1322
1323static int virtio_ccw_restore(struct ccw_device *cdev)
1324{
1325 struct virtio_ccw_device *vcdev = dev_get_drvdata(&cdev->dev);
1326 int ret;
1327
1328 ret = virtio_ccw_set_transport_rev(vcdev);
1329 if (ret)
1330 return ret;
1331
1332 return virtio_device_restore(&vcdev->vdev);
1333}
1334#endif
1335
1312static struct ccw_driver virtio_ccw_driver = { 1336static struct ccw_driver virtio_ccw_driver = {
1313 .driver = { 1337 .driver = {
1314 .owner = THIS_MODULE, 1338 .owner = THIS_MODULE,
@@ -1321,6 +1345,11 @@ static struct ccw_driver virtio_ccw_driver = {
1321 .set_online = virtio_ccw_online, 1345 .set_online = virtio_ccw_online,
1322 .notify = virtio_ccw_cio_notify, 1346 .notify = virtio_ccw_cio_notify,
1323 .int_class = IRQIO_VIR, 1347 .int_class = IRQIO_VIR,
1348#ifdef CONFIG_PM_SLEEP
1349 .freeze = virtio_ccw_freeze,
1350 .thaw = virtio_ccw_restore,
1351 .restore = virtio_ccw_restore,
1352#endif
1324}; 1353};
1325 1354
1326static int __init pure_hex(char **cp, unsigned int *val, int min_digit, 1355static int __init pure_hex(char **cp, unsigned int *val, int min_digit,
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 6198559abbd8..0ad00dbf912d 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -732,7 +732,7 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
732 struct iscsi_tcp_conn *tcp_conn = conn->dd_data; 732 struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
733 struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; 733 struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
734 struct sockaddr_in6 addr; 734 struct sockaddr_in6 addr;
735 int rc, len; 735 int rc;
736 736
737 switch(param) { 737 switch(param) {
738 case ISCSI_PARAM_CONN_PORT: 738 case ISCSI_PARAM_CONN_PORT:
@@ -745,12 +745,12 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
745 } 745 }
746 if (param == ISCSI_PARAM_LOCAL_PORT) 746 if (param == ISCSI_PARAM_LOCAL_PORT)
747 rc = kernel_getsockname(tcp_sw_conn->sock, 747 rc = kernel_getsockname(tcp_sw_conn->sock,
748 (struct sockaddr *)&addr, &len); 748 (struct sockaddr *)&addr);
749 else 749 else
750 rc = kernel_getpeername(tcp_sw_conn->sock, 750 rc = kernel_getpeername(tcp_sw_conn->sock,
751 (struct sockaddr *)&addr, &len); 751 (struct sockaddr *)&addr);
752 spin_unlock_bh(&conn->session->frwd_lock); 752 spin_unlock_bh(&conn->session->frwd_lock);
753 if (rc) 753 if (rc < 0)
754 return rc; 754 return rc;
755 755
756 return iscsi_conn_get_addr_param((struct sockaddr_storage *) 756 return iscsi_conn_get_addr_param((struct sockaddr_storage *)
@@ -771,7 +771,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,
771 struct iscsi_tcp_conn *tcp_conn; 771 struct iscsi_tcp_conn *tcp_conn;
772 struct iscsi_sw_tcp_conn *tcp_sw_conn; 772 struct iscsi_sw_tcp_conn *tcp_sw_conn;
773 struct sockaddr_in6 addr; 773 struct sockaddr_in6 addr;
774 int rc, len; 774 int rc;
775 775
776 switch (param) { 776 switch (param) {
777 case ISCSI_HOST_PARAM_IPADDRESS: 777 case ISCSI_HOST_PARAM_IPADDRESS:
@@ -793,9 +793,9 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,
793 } 793 }
794 794
795 rc = kernel_getsockname(tcp_sw_conn->sock, 795 rc = kernel_getsockname(tcp_sw_conn->sock,
796 (struct sockaddr *)&addr, &len); 796 (struct sockaddr *)&addr);
797 spin_unlock_bh(&session->frwd_lock); 797 spin_unlock_bh(&session->frwd_lock);
798 if (rc) 798 if (rc < 0)
799 return rc; 799 return rc;
800 800
801 return iscsi_conn_get_addr_param((struct sockaddr_storage *) 801 return iscsi_conn_get_addr_param((struct sockaddr_storage *)
diff --git a/drivers/soc/qcom/qmi_interface.c b/drivers/soc/qcom/qmi_interface.c
index 877611d5c42b..321982277697 100644
--- a/drivers/soc/qcom/qmi_interface.c
+++ b/drivers/soc/qcom/qmi_interface.c
@@ -586,7 +586,6 @@ static struct socket *qmi_sock_create(struct qmi_handle *qmi,
586 struct sockaddr_qrtr *sq) 586 struct sockaddr_qrtr *sq)
587{ 587{
588 struct socket *sock; 588 struct socket *sock;
589 int sl = sizeof(*sq);
590 int ret; 589 int ret;
591 590
592 ret = sock_create_kern(&init_net, AF_QIPCRTR, SOCK_DGRAM, 591 ret = sock_create_kern(&init_net, AF_QIPCRTR, SOCK_DGRAM,
@@ -594,7 +593,7 @@ static struct socket *qmi_sock_create(struct qmi_handle *qmi,
594 if (ret < 0) 593 if (ret < 0)
595 return ERR_PTR(ret); 594 return ERR_PTR(ret);
596 595
597 ret = kernel_getsockname(sock, (struct sockaddr *)sq, &sl); 596 ret = kernel_getsockname(sock, (struct sockaddr *)sq);
598 if (ret < 0) { 597 if (ret < 0) {
599 sock_release(sock); 598 sock_release(sock);
600 return ERR_PTR(ret); 599 return ERR_PTR(ret);
diff --git a/drivers/staging/fsl-mc/bus/irq-gic-v3-its-fsl-mc-msi.c b/drivers/staging/fsl-mc/bus/irq-gic-v3-its-fsl-mc-msi.c
index 5064d5ddf581..fc2013aade51 100644
--- a/drivers/staging/fsl-mc/bus/irq-gic-v3-its-fsl-mc-msi.c
+++ b/drivers/staging/fsl-mc/bus/irq-gic-v3-its-fsl-mc-msi.c
@@ -73,6 +73,8 @@ static int __init its_fsl_mc_msi_init(void)
73 73
74 for (np = of_find_matching_node(NULL, its_device_id); np; 74 for (np = of_find_matching_node(NULL, its_device_id); np;
75 np = of_find_matching_node(np, its_device_id)) { 75 np = of_find_matching_node(np, its_device_id)) {
76 if (!of_device_is_available(np))
77 continue;
76 if (!of_property_read_bool(np, "msi-controller")) 78 if (!of_property_read_bool(np, "msi-controller"))
77 continue; 79 continue;
78 80
diff --git a/drivers/staging/ipx/af_ipx.c b/drivers/staging/ipx/af_ipx.c
index d21a9d128d3e..5703dd176787 100644
--- a/drivers/staging/ipx/af_ipx.c
+++ b/drivers/staging/ipx/af_ipx.c
@@ -1577,7 +1577,7 @@ out:
1577 1577
1578 1578
1579static int ipx_getname(struct socket *sock, struct sockaddr *uaddr, 1579static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
1580 int *uaddr_len, int peer) 1580 int peer)
1581{ 1581{
1582 struct ipx_address *addr; 1582 struct ipx_address *addr;
1583 struct sockaddr_ipx sipx; 1583 struct sockaddr_ipx sipx;
@@ -1585,8 +1585,6 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
1585 struct ipx_sock *ipxs = ipx_sk(sk); 1585 struct ipx_sock *ipxs = ipx_sk(sk);
1586 int rc; 1586 int rc;
1587 1587
1588 *uaddr_len = sizeof(struct sockaddr_ipx);
1589
1590 lock_sock(sk); 1588 lock_sock(sk);
1591 if (peer) { 1589 if (peer) {
1592 rc = -ENOTCONN; 1590 rc = -ENOTCONN;
@@ -1620,7 +1618,7 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
1620 sipx.sipx_zero = 0; 1618 sipx.sipx_zero = 0;
1621 memcpy(uaddr, &sipx, sizeof(sipx)); 1619 memcpy(uaddr, &sipx, sizeof(sipx));
1622 1620
1623 rc = 0; 1621 rc = sizeof(struct sockaddr_ipx);
1624out: 1622out:
1625 release_sock(sk); 1623 release_sock(sk);
1626 return rc; 1624 return rc;
diff --git a/drivers/staging/irda/net/af_irda.c b/drivers/staging/irda/net/af_irda.c
index 2f1e9ab3d6d0..c13553a9ee11 100644
--- a/drivers/staging/irda/net/af_irda.c
+++ b/drivers/staging/irda/net/af_irda.c
@@ -697,7 +697,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
697 * 697 *
698 */ 698 */
699static int irda_getname(struct socket *sock, struct sockaddr *uaddr, 699static int irda_getname(struct socket *sock, struct sockaddr *uaddr,
700 int *uaddr_len, int peer) 700 int peer)
701{ 701{
702 struct sockaddr_irda saddr; 702 struct sockaddr_irda saddr;
703 struct sock *sk = sock->sk; 703 struct sock *sk = sock->sk;
@@ -720,11 +720,9 @@ static int irda_getname(struct socket *sock, struct sockaddr *uaddr,
720 pr_debug("%s(), tsap_sel = %#x\n", __func__, saddr.sir_lsap_sel); 720 pr_debug("%s(), tsap_sel = %#x\n", __func__, saddr.sir_lsap_sel);
721 pr_debug("%s(), addr = %08x\n", __func__, saddr.sir_addr); 721 pr_debug("%s(), addr = %08x\n", __func__, saddr.sir_addr);
722 722
723 /* uaddr_len come to us uninitialised */ 723 memcpy(uaddr, &saddr, sizeof (struct sockaddr_irda));
724 *uaddr_len = sizeof (struct sockaddr_irda);
725 memcpy(uaddr, &saddr, *uaddr_len);
726 724
727 return 0; 725 return sizeof (struct sockaddr_irda);
728} 726}
729 727
730/* 728/*
diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c
index ce93806eefca..1bee667802b0 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-socket.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c
@@ -448,14 +448,13 @@ int
448lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port) 448lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port)
449{ 449{
450 struct sockaddr_in sin; 450 struct sockaddr_in sin;
451 int len = sizeof(sin);
452 int rc; 451 int rc;
453 452
454 if (remote) 453 if (remote)
455 rc = kernel_getpeername(sock, (struct sockaddr *)&sin, &len); 454 rc = kernel_getpeername(sock, (struct sockaddr *)&sin);
456 else 455 else
457 rc = kernel_getsockname(sock, (struct sockaddr *)&sin, &len); 456 rc = kernel_getsockname(sock, (struct sockaddr *)&sin);
458 if (rc) { 457 if (rc < 0) {
459 CERROR("Error %d getting sock %s IP/port\n", 458 CERROR("Error %d getting sock %s IP/port\n",
460 rc, remote ? "peer" : "local"); 459 rc, remote ? "peer" : "local");
461 return rc; 460 return rc;
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 64c5a57b92e4..99501785cdc1 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -1020,7 +1020,7 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
1020 struct socket *new_sock, *sock = np->np_socket; 1020 struct socket *new_sock, *sock = np->np_socket;
1021 struct sockaddr_in sock_in; 1021 struct sockaddr_in sock_in;
1022 struct sockaddr_in6 sock_in6; 1022 struct sockaddr_in6 sock_in6;
1023 int rc, err; 1023 int rc;
1024 1024
1025 rc = kernel_accept(sock, &new_sock, 0); 1025 rc = kernel_accept(sock, &new_sock, 0);
1026 if (rc < 0) 1026 if (rc < 0)
@@ -1033,8 +1033,8 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
1033 memset(&sock_in6, 0, sizeof(struct sockaddr_in6)); 1033 memset(&sock_in6, 0, sizeof(struct sockaddr_in6));
1034 1034
1035 rc = conn->sock->ops->getname(conn->sock, 1035 rc = conn->sock->ops->getname(conn->sock,
1036 (struct sockaddr *)&sock_in6, &err, 1); 1036 (struct sockaddr *)&sock_in6, 1);
1037 if (!rc) { 1037 if (rc >= 0) {
1038 if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) { 1038 if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) {
1039 memcpy(&conn->login_sockaddr, &sock_in6, sizeof(sock_in6)); 1039 memcpy(&conn->login_sockaddr, &sock_in6, sizeof(sock_in6));
1040 } else { 1040 } else {
@@ -1047,8 +1047,8 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
1047 } 1047 }
1048 1048
1049 rc = conn->sock->ops->getname(conn->sock, 1049 rc = conn->sock->ops->getname(conn->sock,
1050 (struct sockaddr *)&sock_in6, &err, 0); 1050 (struct sockaddr *)&sock_in6, 0);
1051 if (!rc) { 1051 if (rc >= 0) {
1052 if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) { 1052 if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) {
1053 memcpy(&conn->local_sockaddr, &sock_in6, sizeof(sock_in6)); 1053 memcpy(&conn->local_sockaddr, &sock_in6, sizeof(sock_in6));
1054 } else { 1054 } else {
@@ -1063,13 +1063,13 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
1063 memset(&sock_in, 0, sizeof(struct sockaddr_in)); 1063 memset(&sock_in, 0, sizeof(struct sockaddr_in));
1064 1064
1065 rc = conn->sock->ops->getname(conn->sock, 1065 rc = conn->sock->ops->getname(conn->sock,
1066 (struct sockaddr *)&sock_in, &err, 1); 1066 (struct sockaddr *)&sock_in, 1);
1067 if (!rc) 1067 if (rc >= 0)
1068 memcpy(&conn->login_sockaddr, &sock_in, sizeof(sock_in)); 1068 memcpy(&conn->login_sockaddr, &sock_in, sizeof(sock_in));
1069 1069
1070 rc = conn->sock->ops->getname(conn->sock, 1070 rc = conn->sock->ops->getname(conn->sock,
1071 (struct sockaddr *)&sock_in, &err, 0); 1071 (struct sockaddr *)&sock_in, 0);
1072 if (!rc) 1072 if (rc >= 0)
1073 memcpy(&conn->local_sockaddr, &sock_in, sizeof(sock_in)); 1073 memcpy(&conn->local_sockaddr, &sock_in, sizeof(sock_in));
1074 } 1074 }
1075 1075
diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig
index f699abab1787..148f3ee70286 100644
--- a/drivers/usb/Kconfig
+++ b/drivers/usb/Kconfig
@@ -19,6 +19,12 @@ config USB_EHCI_BIG_ENDIAN_MMIO
19config USB_EHCI_BIG_ENDIAN_DESC 19config USB_EHCI_BIG_ENDIAN_DESC
20 bool 20 bool
21 21
22config USB_UHCI_BIG_ENDIAN_MMIO
23 bool
24
25config USB_UHCI_BIG_ENDIAN_DESC
26 bool
27
22menuconfig USB_SUPPORT 28menuconfig USB_SUPPORT
23 bool "USB support" 29 bool "USB support"
24 depends on HAS_IOMEM 30 depends on HAS_IOMEM
diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index 6150bed7cfa8..4fcfb3084b36 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -633,14 +633,6 @@ config USB_UHCI_ASPEED
633 bool 633 bool
634 default y if ARCH_ASPEED 634 default y if ARCH_ASPEED
635 635
636config USB_UHCI_BIG_ENDIAN_MMIO
637 bool
638 default y if SPARC_LEON
639
640config USB_UHCI_BIG_ENDIAN_DESC
641 bool
642 default y if SPARC_LEON
643
644config USB_FHCI_HCD 636config USB_FHCI_HCD
645 tristate "Freescale QE USB Host Controller support" 637 tristate "Freescale QE USB Host Controller support"
646 depends on OF_GPIO && QE_GPIO && QUICC_ENGINE 638 depends on OF_GPIO && QE_GPIO && QUICC_ENGINE
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 610cba276d47..b5fb56b822fd 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1038,7 +1038,7 @@ static struct socket *get_raw_socket(int fd)
1038 struct sockaddr_ll sa; 1038 struct sockaddr_ll sa;
1039 char buf[MAX_ADDR_LEN]; 1039 char buf[MAX_ADDR_LEN];
1040 } uaddr; 1040 } uaddr;
1041 int uaddr_len = sizeof uaddr, r; 1041 int r;
1042 struct socket *sock = sockfd_lookup(fd, &r); 1042 struct socket *sock = sockfd_lookup(fd, &r);
1043 1043
1044 if (!sock) 1044 if (!sock)
@@ -1050,9 +1050,8 @@ static struct socket *get_raw_socket(int fd)
1050 goto err; 1050 goto err;
1051 } 1051 }
1052 1052
1053 r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, 1053 r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, 0);
1054 &uaddr_len, 0); 1054 if (r < 0)
1055 if (r)
1056 goto err; 1055 goto err;
1057 1056
1058 if (uaddr.sa.sll_family != AF_PACKET) { 1057 if (uaddr.sa.sll_family != AF_PACKET) {
diff --git a/drivers/video/fbdev/geode/video_gx.c b/drivers/video/fbdev/geode/video_gx.c
index 6082f653c68a..67773e8bbb95 100644
--- a/drivers/video/fbdev/geode/video_gx.c
+++ b/drivers/video/fbdev/geode/video_gx.c
@@ -127,7 +127,7 @@ void gx_set_dclk_frequency(struct fb_info *info)
127 int timeout = 1000; 127 int timeout = 1000;
128 128
129 /* Rev. 1 Geode GXs use a 14 MHz reference clock instead of 48 MHz. */ 129 /* Rev. 1 Geode GXs use a 14 MHz reference clock instead of 48 MHz. */
130 if (cpu_data(0).x86_mask == 1) { 130 if (cpu_data(0).x86_stepping == 1) {
131 pll_table = gx_pll_table_14MHz; 131 pll_table = gx_pll_table_14MHz;
132 pll_table_len = ARRAY_SIZE(gx_pll_table_14MHz); 132 pll_table_len = ARRAY_SIZE(gx_pll_table_14MHz);
133 } else { 133 } else {
diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 753d9cb437d0..aedbee3b2838 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -60,6 +60,7 @@ struct sock_mapping {
60 bool active_socket; 60 bool active_socket;
61 struct list_head list; 61 struct list_head list;
62 struct socket *sock; 62 struct socket *sock;
63 atomic_t refcount;
63 union { 64 union {
64 struct { 65 struct {
65 int irq; 66 int irq;
@@ -93,6 +94,32 @@ struct sock_mapping {
93 }; 94 };
94}; 95};
95 96
97static inline struct sock_mapping *pvcalls_enter_sock(struct socket *sock)
98{
99 struct sock_mapping *map;
100
101 if (!pvcalls_front_dev ||
102 dev_get_drvdata(&pvcalls_front_dev->dev) == NULL)
103 return ERR_PTR(-ENOTCONN);
104
105 map = (struct sock_mapping *)sock->sk->sk_send_head;
106 if (map == NULL)
107 return ERR_PTR(-ENOTSOCK);
108
109 pvcalls_enter();
110 atomic_inc(&map->refcount);
111 return map;
112}
113
114static inline void pvcalls_exit_sock(struct socket *sock)
115{
116 struct sock_mapping *map;
117
118 map = (struct sock_mapping *)sock->sk->sk_send_head;
119 atomic_dec(&map->refcount);
120 pvcalls_exit();
121}
122
96static inline int get_request(struct pvcalls_bedata *bedata, int *req_id) 123static inline int get_request(struct pvcalls_bedata *bedata, int *req_id)
97{ 124{
98 *req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1); 125 *req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
@@ -369,31 +396,23 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
369 if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM) 396 if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
370 return -EOPNOTSUPP; 397 return -EOPNOTSUPP;
371 398
372 pvcalls_enter(); 399 map = pvcalls_enter_sock(sock);
373 if (!pvcalls_front_dev) { 400 if (IS_ERR(map))
374 pvcalls_exit(); 401 return PTR_ERR(map);
375 return -ENOTCONN;
376 }
377 402
378 bedata = dev_get_drvdata(&pvcalls_front_dev->dev); 403 bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
379 404
380 map = (struct sock_mapping *)sock->sk->sk_send_head;
381 if (!map) {
382 pvcalls_exit();
383 return -ENOTSOCK;
384 }
385
386 spin_lock(&bedata->socket_lock); 405 spin_lock(&bedata->socket_lock);
387 ret = get_request(bedata, &req_id); 406 ret = get_request(bedata, &req_id);
388 if (ret < 0) { 407 if (ret < 0) {
389 spin_unlock(&bedata->socket_lock); 408 spin_unlock(&bedata->socket_lock);
390 pvcalls_exit(); 409 pvcalls_exit_sock(sock);
391 return ret; 410 return ret;
392 } 411 }
393 ret = create_active(map, &evtchn); 412 ret = create_active(map, &evtchn);
394 if (ret < 0) { 413 if (ret < 0) {
395 spin_unlock(&bedata->socket_lock); 414 spin_unlock(&bedata->socket_lock);
396 pvcalls_exit(); 415 pvcalls_exit_sock(sock);
397 return ret; 416 return ret;
398 } 417 }
399 418
@@ -423,7 +442,7 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
423 smp_rmb(); 442 smp_rmb();
424 ret = bedata->rsp[req_id].ret; 443 ret = bedata->rsp[req_id].ret;
425 bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID; 444 bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID;
426 pvcalls_exit(); 445 pvcalls_exit_sock(sock);
427 return ret; 446 return ret;
428} 447}
429 448
@@ -488,23 +507,15 @@ int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg,
488 if (flags & (MSG_CONFIRM|MSG_DONTROUTE|MSG_EOR|MSG_OOB)) 507 if (flags & (MSG_CONFIRM|MSG_DONTROUTE|MSG_EOR|MSG_OOB))
489 return -EOPNOTSUPP; 508 return -EOPNOTSUPP;
490 509
491 pvcalls_enter(); 510 map = pvcalls_enter_sock(sock);
492 if (!pvcalls_front_dev) { 511 if (IS_ERR(map))
493 pvcalls_exit(); 512 return PTR_ERR(map);
494 return -ENOTCONN;
495 }
496 bedata = dev_get_drvdata(&pvcalls_front_dev->dev); 513 bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
497 514
498 map = (struct sock_mapping *) sock->sk->sk_send_head;
499 if (!map) {
500 pvcalls_exit();
501 return -ENOTSOCK;
502 }
503
504 mutex_lock(&map->active.out_mutex); 515 mutex_lock(&map->active.out_mutex);
505 if ((flags & MSG_DONTWAIT) && !pvcalls_front_write_todo(map)) { 516 if ((flags & MSG_DONTWAIT) && !pvcalls_front_write_todo(map)) {
506 mutex_unlock(&map->active.out_mutex); 517 mutex_unlock(&map->active.out_mutex);
507 pvcalls_exit(); 518 pvcalls_exit_sock(sock);
508 return -EAGAIN; 519 return -EAGAIN;
509 } 520 }
510 if (len > INT_MAX) 521 if (len > INT_MAX)
@@ -526,7 +537,7 @@ again:
526 tot_sent = sent; 537 tot_sent = sent;
527 538
528 mutex_unlock(&map->active.out_mutex); 539 mutex_unlock(&map->active.out_mutex);
529 pvcalls_exit(); 540 pvcalls_exit_sock(sock);
530 return tot_sent; 541 return tot_sent;
531} 542}
532 543
@@ -591,19 +602,11 @@ int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
591 if (flags & (MSG_CMSG_CLOEXEC|MSG_ERRQUEUE|MSG_OOB|MSG_TRUNC)) 602 if (flags & (MSG_CMSG_CLOEXEC|MSG_ERRQUEUE|MSG_OOB|MSG_TRUNC))
592 return -EOPNOTSUPP; 603 return -EOPNOTSUPP;
593 604
594 pvcalls_enter(); 605 map = pvcalls_enter_sock(sock);
595 if (!pvcalls_front_dev) { 606 if (IS_ERR(map))
596 pvcalls_exit(); 607 return PTR_ERR(map);
597 return -ENOTCONN;
598 }
599 bedata = dev_get_drvdata(&pvcalls_front_dev->dev); 608 bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
600 609
601 map = (struct sock_mapping *) sock->sk->sk_send_head;
602 if (!map) {
603 pvcalls_exit();
604 return -ENOTSOCK;
605 }
606
607 mutex_lock(&map->active.in_mutex); 610 mutex_lock(&map->active.in_mutex);
608 if (len > XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER)) 611 if (len > XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER))
609 len = XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER); 612 len = XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER);
@@ -623,7 +626,7 @@ int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
623 ret = 0; 626 ret = 0;
624 627
625 mutex_unlock(&map->active.in_mutex); 628 mutex_unlock(&map->active.in_mutex);
626 pvcalls_exit(); 629 pvcalls_exit_sock(sock);
627 return ret; 630 return ret;
628} 631}
629 632
@@ -637,24 +640,16 @@ int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
637 if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM) 640 if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
638 return -EOPNOTSUPP; 641 return -EOPNOTSUPP;
639 642
640 pvcalls_enter(); 643 map = pvcalls_enter_sock(sock);
641 if (!pvcalls_front_dev) { 644 if (IS_ERR(map))
642 pvcalls_exit(); 645 return PTR_ERR(map);
643 return -ENOTCONN;
644 }
645 bedata = dev_get_drvdata(&pvcalls_front_dev->dev); 646 bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
646 647
647 map = (struct sock_mapping *) sock->sk->sk_send_head;
648 if (map == NULL) {
649 pvcalls_exit();
650 return -ENOTSOCK;
651 }
652
653 spin_lock(&bedata->socket_lock); 648 spin_lock(&bedata->socket_lock);
654 ret = get_request(bedata, &req_id); 649 ret = get_request(bedata, &req_id);
655 if (ret < 0) { 650 if (ret < 0) {
656 spin_unlock(&bedata->socket_lock); 651 spin_unlock(&bedata->socket_lock);
657 pvcalls_exit(); 652 pvcalls_exit_sock(sock);
658 return ret; 653 return ret;
659 } 654 }
660 req = RING_GET_REQUEST(&bedata->ring, req_id); 655 req = RING_GET_REQUEST(&bedata->ring, req_id);
@@ -684,7 +679,7 @@ int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
684 bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID; 679 bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID;
685 680
686 map->passive.status = PVCALLS_STATUS_BIND; 681 map->passive.status = PVCALLS_STATUS_BIND;
687 pvcalls_exit(); 682 pvcalls_exit_sock(sock);
688 return 0; 683 return 0;
689} 684}
690 685
@@ -695,21 +690,13 @@ int pvcalls_front_listen(struct socket *sock, int backlog)
695 struct xen_pvcalls_request *req; 690 struct xen_pvcalls_request *req;
696 int notify, req_id, ret; 691 int notify, req_id, ret;
697 692
698 pvcalls_enter(); 693 map = pvcalls_enter_sock(sock);
699 if (!pvcalls_front_dev) { 694 if (IS_ERR(map))
700 pvcalls_exit(); 695 return PTR_ERR(map);
701 return -ENOTCONN;
702 }
703 bedata = dev_get_drvdata(&pvcalls_front_dev->dev); 696 bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
704 697
705 map = (struct sock_mapping *) sock->sk->sk_send_head;
706 if (!map) {
707 pvcalls_exit();
708 return -ENOTSOCK;
709 }
710
711 if (map->passive.status != PVCALLS_STATUS_BIND) { 698 if (map->passive.status != PVCALLS_STATUS_BIND) {
712 pvcalls_exit(); 699 pvcalls_exit_sock(sock);
713 return -EOPNOTSUPP; 700 return -EOPNOTSUPP;
714 } 701 }
715 702
@@ -717,7 +704,7 @@ int pvcalls_front_listen(struct socket *sock, int backlog)
717 ret = get_request(bedata, &req_id); 704 ret = get_request(bedata, &req_id);
718 if (ret < 0) { 705 if (ret < 0) {
719 spin_unlock(&bedata->socket_lock); 706 spin_unlock(&bedata->socket_lock);
720 pvcalls_exit(); 707 pvcalls_exit_sock(sock);
721 return ret; 708 return ret;
722 } 709 }
723 req = RING_GET_REQUEST(&bedata->ring, req_id); 710 req = RING_GET_REQUEST(&bedata->ring, req_id);
@@ -741,7 +728,7 @@ int pvcalls_front_listen(struct socket *sock, int backlog)
741 bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID; 728 bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID;
742 729
743 map->passive.status = PVCALLS_STATUS_LISTEN; 730 map->passive.status = PVCALLS_STATUS_LISTEN;
744 pvcalls_exit(); 731 pvcalls_exit_sock(sock);
745 return ret; 732 return ret;
746} 733}
747 734
@@ -753,21 +740,13 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
753 struct xen_pvcalls_request *req; 740 struct xen_pvcalls_request *req;
754 int notify, req_id, ret, evtchn, nonblock; 741 int notify, req_id, ret, evtchn, nonblock;
755 742
756 pvcalls_enter(); 743 map = pvcalls_enter_sock(sock);
757 if (!pvcalls_front_dev) { 744 if (IS_ERR(map))
758 pvcalls_exit(); 745 return PTR_ERR(map);
759 return -ENOTCONN;
760 }
761 bedata = dev_get_drvdata(&pvcalls_front_dev->dev); 746 bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
762 747
763 map = (struct sock_mapping *) sock->sk->sk_send_head;
764 if (!map) {
765 pvcalls_exit();
766 return -ENOTSOCK;
767 }
768
769 if (map->passive.status != PVCALLS_STATUS_LISTEN) { 748 if (map->passive.status != PVCALLS_STATUS_LISTEN) {
770 pvcalls_exit(); 749 pvcalls_exit_sock(sock);
771 return -EINVAL; 750 return -EINVAL;
772 } 751 }
773 752
@@ -785,13 +764,13 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
785 goto received; 764 goto received;
786 } 765 }
787 if (nonblock) { 766 if (nonblock) {
788 pvcalls_exit(); 767 pvcalls_exit_sock(sock);
789 return -EAGAIN; 768 return -EAGAIN;
790 } 769 }
791 if (wait_event_interruptible(map->passive.inflight_accept_req, 770 if (wait_event_interruptible(map->passive.inflight_accept_req,
792 !test_and_set_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, 771 !test_and_set_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
793 (void *)&map->passive.flags))) { 772 (void *)&map->passive.flags))) {
794 pvcalls_exit(); 773 pvcalls_exit_sock(sock);
795 return -EINTR; 774 return -EINTR;
796 } 775 }
797 } 776 }
@@ -802,7 +781,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
802 clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, 781 clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
803 (void *)&map->passive.flags); 782 (void *)&map->passive.flags);
804 spin_unlock(&bedata->socket_lock); 783 spin_unlock(&bedata->socket_lock);
805 pvcalls_exit(); 784 pvcalls_exit_sock(sock);
806 return ret; 785 return ret;
807 } 786 }
808 map2 = kzalloc(sizeof(*map2), GFP_ATOMIC); 787 map2 = kzalloc(sizeof(*map2), GFP_ATOMIC);
@@ -810,7 +789,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
810 clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, 789 clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
811 (void *)&map->passive.flags); 790 (void *)&map->passive.flags);
812 spin_unlock(&bedata->socket_lock); 791 spin_unlock(&bedata->socket_lock);
813 pvcalls_exit(); 792 pvcalls_exit_sock(sock);
814 return -ENOMEM; 793 return -ENOMEM;
815 } 794 }
816 ret = create_active(map2, &evtchn); 795 ret = create_active(map2, &evtchn);
@@ -819,7 +798,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
819 clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, 798 clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
820 (void *)&map->passive.flags); 799 (void *)&map->passive.flags);
821 spin_unlock(&bedata->socket_lock); 800 spin_unlock(&bedata->socket_lock);
822 pvcalls_exit(); 801 pvcalls_exit_sock(sock);
823 return ret; 802 return ret;
824 } 803 }
825 list_add_tail(&map2->list, &bedata->socket_mappings); 804 list_add_tail(&map2->list, &bedata->socket_mappings);
@@ -841,13 +820,13 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
841 /* We could check if we have received a response before returning. */ 820 /* We could check if we have received a response before returning. */
842 if (nonblock) { 821 if (nonblock) {
843 WRITE_ONCE(map->passive.inflight_req_id, req_id); 822 WRITE_ONCE(map->passive.inflight_req_id, req_id);
844 pvcalls_exit(); 823 pvcalls_exit_sock(sock);
845 return -EAGAIN; 824 return -EAGAIN;
846 } 825 }
847 826
848 if (wait_event_interruptible(bedata->inflight_req, 827 if (wait_event_interruptible(bedata->inflight_req,
849 READ_ONCE(bedata->rsp[req_id].req_id) == req_id)) { 828 READ_ONCE(bedata->rsp[req_id].req_id) == req_id)) {
850 pvcalls_exit(); 829 pvcalls_exit_sock(sock);
851 return -EINTR; 830 return -EINTR;
852 } 831 }
853 /* read req_id, then the content */ 832 /* read req_id, then the content */
@@ -862,7 +841,7 @@ received:
862 clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, 841 clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
863 (void *)&map->passive.flags); 842 (void *)&map->passive.flags);
864 pvcalls_front_free_map(bedata, map2); 843 pvcalls_front_free_map(bedata, map2);
865 pvcalls_exit(); 844 pvcalls_exit_sock(sock);
866 return -ENOMEM; 845 return -ENOMEM;
867 } 846 }
868 newsock->sk->sk_send_head = (void *)map2; 847 newsock->sk->sk_send_head = (void *)map2;
@@ -874,7 +853,7 @@ received:
874 clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, (void *)&map->passive.flags); 853 clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, (void *)&map->passive.flags);
875 wake_up(&map->passive.inflight_accept_req); 854 wake_up(&map->passive.inflight_accept_req);
876 855
877 pvcalls_exit(); 856 pvcalls_exit_sock(sock);
878 return ret; 857 return ret;
879} 858}
880 859
@@ -965,23 +944,16 @@ __poll_t pvcalls_front_poll(struct file *file, struct socket *sock,
965 struct sock_mapping *map; 944 struct sock_mapping *map;
966 __poll_t ret; 945 __poll_t ret;
967 946
968 pvcalls_enter(); 947 map = pvcalls_enter_sock(sock);
969 if (!pvcalls_front_dev) { 948 if (IS_ERR(map))
970 pvcalls_exit();
971 return EPOLLNVAL; 949 return EPOLLNVAL;
972 }
973 bedata = dev_get_drvdata(&pvcalls_front_dev->dev); 950 bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
974 951
975 map = (struct sock_mapping *) sock->sk->sk_send_head;
976 if (!map) {
977 pvcalls_exit();
978 return EPOLLNVAL;
979 }
980 if (map->active_socket) 952 if (map->active_socket)
981 ret = pvcalls_front_poll_active(file, bedata, map, wait); 953 ret = pvcalls_front_poll_active(file, bedata, map, wait);
982 else 954 else
983 ret = pvcalls_front_poll_passive(file, bedata, map, wait); 955 ret = pvcalls_front_poll_passive(file, bedata, map, wait);
984 pvcalls_exit(); 956 pvcalls_exit_sock(sock);
985 return ret; 957 return ret;
986} 958}
987 959
@@ -995,25 +967,20 @@ int pvcalls_front_release(struct socket *sock)
995 if (sock->sk == NULL) 967 if (sock->sk == NULL)
996 return 0; 968 return 0;
997 969
998 pvcalls_enter(); 970 map = pvcalls_enter_sock(sock);
999 if (!pvcalls_front_dev) { 971 if (IS_ERR(map)) {
1000 pvcalls_exit(); 972 if (PTR_ERR(map) == -ENOTCONN)
1001 return -EIO; 973 return -EIO;
974 else
975 return 0;
1002 } 976 }
1003
1004 bedata = dev_get_drvdata(&pvcalls_front_dev->dev); 977 bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
1005 978
1006 map = (struct sock_mapping *) sock->sk->sk_send_head;
1007 if (map == NULL) {
1008 pvcalls_exit();
1009 return 0;
1010 }
1011
1012 spin_lock(&bedata->socket_lock); 979 spin_lock(&bedata->socket_lock);
1013 ret = get_request(bedata, &req_id); 980 ret = get_request(bedata, &req_id);
1014 if (ret < 0) { 981 if (ret < 0) {
1015 spin_unlock(&bedata->socket_lock); 982 spin_unlock(&bedata->socket_lock);
1016 pvcalls_exit(); 983 pvcalls_exit_sock(sock);
1017 return ret; 984 return ret;
1018 } 985 }
1019 sock->sk->sk_send_head = NULL; 986 sock->sk->sk_send_head = NULL;
@@ -1043,14 +1010,20 @@ int pvcalls_front_release(struct socket *sock)
1043 /* 1010 /*
1044 * We need to make sure that sendmsg/recvmsg on this socket have 1011 * We need to make sure that sendmsg/recvmsg on this socket have
1045 * not started before we've cleared sk_send_head here. The 1012 * not started before we've cleared sk_send_head here. The
1046 * easiest (though not optimal) way to guarantee this is to see 1013 * easiest way to guarantee this is to see that no pvcalls
1047 * that no pvcall (other than us) is in progress. 1014 * (other than us) is in progress on this socket.
1048 */ 1015 */
1049 while (atomic_read(&pvcalls_refcount) > 1) 1016 while (atomic_read(&map->refcount) > 1)
1050 cpu_relax(); 1017 cpu_relax();
1051 1018
1052 pvcalls_front_free_map(bedata, map); 1019 pvcalls_front_free_map(bedata, map);
1053 } else { 1020 } else {
1021 wake_up(&bedata->inflight_req);
1022 wake_up(&map->passive.inflight_accept_req);
1023
1024 while (atomic_read(&map->refcount) > 1)
1025 cpu_relax();
1026
1054 spin_lock(&bedata->socket_lock); 1027 spin_lock(&bedata->socket_lock);
1055 list_del(&map->list); 1028 list_del(&map->list);
1056 spin_unlock(&bedata->socket_lock); 1029 spin_unlock(&bedata->socket_lock);
diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h
index 149c5e7efc89..092981171df1 100644
--- a/drivers/xen/xenbus/xenbus.h
+++ b/drivers/xen/xenbus/xenbus.h
@@ -76,6 +76,7 @@ struct xb_req_data {
76 struct list_head list; 76 struct list_head list;
77 wait_queue_head_t wq; 77 wait_queue_head_t wq;
78 struct xsd_sockmsg msg; 78 struct xsd_sockmsg msg;
79 uint32_t caller_req_id;
79 enum xsd_sockmsg_type type; 80 enum xsd_sockmsg_type type;
80 char *body; 81 char *body;
81 const struct kvec *vec; 82 const struct kvec *vec;
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
index 5b081a01779d..d239fc3c5e3d 100644
--- a/drivers/xen/xenbus/xenbus_comms.c
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -309,6 +309,7 @@ static int process_msg(void)
309 goto out; 309 goto out;
310 310
311 if (req->state == xb_req_state_wait_reply) { 311 if (req->state == xb_req_state_wait_reply) {
312 req->msg.req_id = req->caller_req_id;
312 req->msg.type = state.msg.type; 313 req->msg.type = state.msg.type;
313 req->msg.len = state.msg.len; 314 req->msg.len = state.msg.len;
314 req->body = state.body; 315 req->body = state.body;
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index 3e59590c7254..3f3b29398ab8 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -227,6 +227,8 @@ static void xs_send(struct xb_req_data *req, struct xsd_sockmsg *msg)
227 req->state = xb_req_state_queued; 227 req->state = xb_req_state_queued;
228 init_waitqueue_head(&req->wq); 228 init_waitqueue_head(&req->wq);
229 229
230 /* Save the caller req_id and restore it later in the reply */
231 req->caller_req_id = req->msg.req_id;
230 req->msg.req_id = xs_request_enter(req); 232 req->msg.req_id = xs_request_enter(req);
231 233
232 mutex_lock(&xb_write_mutex); 234 mutex_lock(&xb_write_mutex);
@@ -310,6 +312,7 @@ static void *xs_talkv(struct xenbus_transaction t,
310 req->num_vecs = num_vecs; 312 req->num_vecs = num_vecs;
311 req->cb = xs_wake_up; 313 req->cb = xs_wake_up;
312 314
315 msg.req_id = 0;
313 msg.tx_id = t.id; 316 msg.tx_id = t.id;
314 msg.type = type; 317 msg.type = type;
315 msg.len = 0; 318 msg.len = 0;
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index e4054e533f6d..f94b2d8c744a 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1264,7 +1264,16 @@ again:
1264 while (node) { 1264 while (node) {
1265 ref = rb_entry(node, struct prelim_ref, rbnode); 1265 ref = rb_entry(node, struct prelim_ref, rbnode);
1266 node = rb_next(&ref->rbnode); 1266 node = rb_next(&ref->rbnode);
1267 WARN_ON(ref->count < 0); 1267 /*
1268 * ref->count < 0 can happen here if there are delayed
1269 * refs with a node->action of BTRFS_DROP_DELAYED_REF.
1270 * prelim_ref_insert() relies on this when merging
1271 * identical refs to keep the overall count correct.
1272 * prelim_ref_insert() will merge only those refs
1273 * which compare identically. Any refs having
1274 * e.g. different offsets would not be merged,
1275 * and would retain their original ref->count < 0.
1276 */
1268 if (roots && ref->count && ref->root_id && ref->parent == 0) { 1277 if (roots && ref->count && ref->root_id && ref->parent == 0) {
1269 if (sc && sc->root_objectid && 1278 if (sc && sc->root_objectid &&
1270 ref->root_id != sc->root_objectid) { 1279 ref->root_id != sc->root_objectid) {
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index a1a40cf382e3..7ab5e0128f0c 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -821,7 +821,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
821 spin_unlock(&delayed_refs->lock); 821 spin_unlock(&delayed_refs->lock);
822 822
823 if (qrecord_inserted) 823 if (qrecord_inserted)
824 return btrfs_qgroup_trace_extent_post(fs_info, record); 824 btrfs_qgroup_trace_extent_post(fs_info, record);
825
825 return 0; 826 return 0;
826 827
827free_head_ref: 828free_head_ref:
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 05751a677da4..c1618ab9fecf 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2147,6 +2147,10 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
2147 u64 bytes; 2147 u64 bytes;
2148 struct request_queue *req_q; 2148 struct request_queue *req_q;
2149 2149
2150 if (!stripe->dev->bdev) {
2151 ASSERT(btrfs_test_opt(fs_info, DEGRADED));
2152 continue;
2153 }
2150 req_q = bdev_get_queue(stripe->dev->bdev); 2154 req_q = bdev_get_queue(stripe->dev->bdev);
2151 if (!blk_queue_discard(req_q)) 2155 if (!blk_queue_discard(req_q))
2152 continue; 2156 continue;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 53ca025655fc..a79299a89b7d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1335,8 +1335,11 @@ next_slot:
1335 leaf = path->nodes[0]; 1335 leaf = path->nodes[0];
1336 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 1336 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1337 ret = btrfs_next_leaf(root, path); 1337 ret = btrfs_next_leaf(root, path);
1338 if (ret < 0) 1338 if (ret < 0) {
1339 if (cow_start != (u64)-1)
1340 cur_offset = cow_start;
1339 goto error; 1341 goto error;
1342 }
1340 if (ret > 0) 1343 if (ret > 0)
1341 break; 1344 break;
1342 leaf = path->nodes[0]; 1345 leaf = path->nodes[0];
@@ -3385,6 +3388,11 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
3385 ret = btrfs_orphan_reserve_metadata(trans, inode); 3388 ret = btrfs_orphan_reserve_metadata(trans, inode);
3386 ASSERT(!ret); 3389 ASSERT(!ret);
3387 if (ret) { 3390 if (ret) {
3391 /*
3392 * dec doesn't need spin_lock as ->orphan_block_rsv
3393 * would be released only if ->orphan_inodes is
3394 * zero.
3395 */
3388 atomic_dec(&root->orphan_inodes); 3396 atomic_dec(&root->orphan_inodes);
3389 clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, 3397 clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
3390 &inode->runtime_flags); 3398 &inode->runtime_flags);
@@ -3399,12 +3407,17 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
3399 if (insert >= 1) { 3407 if (insert >= 1) {
3400 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); 3408 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
3401 if (ret) { 3409 if (ret) {
3402 atomic_dec(&root->orphan_inodes);
3403 if (reserve) { 3410 if (reserve) {
3404 clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, 3411 clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
3405 &inode->runtime_flags); 3412 &inode->runtime_flags);
3406 btrfs_orphan_release_metadata(inode); 3413 btrfs_orphan_release_metadata(inode);
3407 } 3414 }
3415 /*
3416 * btrfs_orphan_commit_root may race with us and set
3417 * ->orphan_block_rsv to zero, in order to avoid that,
3418 * decrease ->orphan_inodes after everything is done.
3419 */
3420 atomic_dec(&root->orphan_inodes);
3408 if (ret != -EEXIST) { 3421 if (ret != -EEXIST) {
3409 clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 3422 clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3410 &inode->runtime_flags); 3423 &inode->runtime_flags);
@@ -3436,28 +3449,26 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
3436{ 3449{
3437 struct btrfs_root *root = inode->root; 3450 struct btrfs_root *root = inode->root;
3438 int delete_item = 0; 3451 int delete_item = 0;
3439 int release_rsv = 0;
3440 int ret = 0; 3452 int ret = 0;
3441 3453
3442 spin_lock(&root->orphan_lock);
3443 if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 3454 if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3444 &inode->runtime_flags)) 3455 &inode->runtime_flags))
3445 delete_item = 1; 3456 delete_item = 1;
3446 3457
3458 if (delete_item && trans)
3459 ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
3460
3447 if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, 3461 if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
3448 &inode->runtime_flags)) 3462 &inode->runtime_flags))
3449 release_rsv = 1; 3463 btrfs_orphan_release_metadata(inode);
3450 spin_unlock(&root->orphan_lock);
3451 3464
3452 if (delete_item) { 3465 /*
3466 * btrfs_orphan_commit_root may race with us and set ->orphan_block_rsv
3467 * to zero, in order to avoid that, decrease ->orphan_inodes after
3468 * everything is done.
3469 */
3470 if (delete_item)
3453 atomic_dec(&root->orphan_inodes); 3471 atomic_dec(&root->orphan_inodes);
3454 if (trans)
3455 ret = btrfs_del_orphan_item(trans, root,
3456 btrfs_ino(inode));
3457 }
3458
3459 if (release_rsv)
3460 btrfs_orphan_release_metadata(inode);
3461 3472
3462 return ret; 3473 return ret;
3463} 3474}
@@ -5281,7 +5292,7 @@ void btrfs_evict_inode(struct inode *inode)
5281 trace_btrfs_inode_evict(inode); 5292 trace_btrfs_inode_evict(inode);
5282 5293
5283 if (!root) { 5294 if (!root) {
5284 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); 5295 clear_inode(inode);
5285 return; 5296 return;
5286 } 5297 }
5287 5298
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 9e61dd624f7b..aa259d6986e1 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1442,8 +1442,13 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
1442 int ret; 1442 int ret;
1443 1443
1444 ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false); 1444 ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false);
1445 if (ret < 0) 1445 if (ret < 0) {
1446 return ret; 1446 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1447 btrfs_warn(fs_info,
1448"error accounting new delayed refs extent (err code: %d), quota inconsistent",
1449 ret);
1450 return 0;
1451 }
1447 1452
1448 /* 1453 /*
1449 * Here we don't need to get the lock of 1454 * Here we don't need to get the lock of
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index afadaadab18e..4fd19b4d6675 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -29,6 +29,7 @@
29#include "hash.h" 29#include "hash.h"
30#include "compression.h" 30#include "compression.h"
31#include "qgroup.h" 31#include "qgroup.h"
32#include "inode-map.h"
32 33
33/* magic values for the inode_only field in btrfs_log_inode: 34/* magic values for the inode_only field in btrfs_log_inode:
34 * 35 *
@@ -2472,6 +2473,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
2472 clean_tree_block(fs_info, next); 2473 clean_tree_block(fs_info, next);
2473 btrfs_wait_tree_block_writeback(next); 2474 btrfs_wait_tree_block_writeback(next);
2474 btrfs_tree_unlock(next); 2475 btrfs_tree_unlock(next);
2476 } else {
2477 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
2478 clear_extent_buffer_dirty(next);
2475 } 2479 }
2476 2480
2477 WARN_ON(root_owner != 2481 WARN_ON(root_owner !=
@@ -2552,6 +2556,9 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
2552 clean_tree_block(fs_info, next); 2556 clean_tree_block(fs_info, next);
2553 btrfs_wait_tree_block_writeback(next); 2557 btrfs_wait_tree_block_writeback(next);
2554 btrfs_tree_unlock(next); 2558 btrfs_tree_unlock(next);
2559 } else {
2560 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
2561 clear_extent_buffer_dirty(next);
2555 } 2562 }
2556 2563
2557 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); 2564 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
@@ -2630,6 +2637,9 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
2630 clean_tree_block(fs_info, next); 2637 clean_tree_block(fs_info, next);
2631 btrfs_wait_tree_block_writeback(next); 2638 btrfs_wait_tree_block_writeback(next);
2632 btrfs_tree_unlock(next); 2639 btrfs_tree_unlock(next);
2640 } else {
2641 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
2642 clear_extent_buffer_dirty(next);
2633 } 2643 }
2634 2644
2635 WARN_ON(log->root_key.objectid != 2645 WARN_ON(log->root_key.objectid !=
@@ -3018,13 +3028,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
3018 3028
3019 while (1) { 3029 while (1) {
3020 ret = find_first_extent_bit(&log->dirty_log_pages, 3030 ret = find_first_extent_bit(&log->dirty_log_pages,
3021 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW, 3031 0, &start, &end,
3032 EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT,
3022 NULL); 3033 NULL);
3023 if (ret) 3034 if (ret)
3024 break; 3035 break;
3025 3036
3026 clear_extent_bits(&log->dirty_log_pages, start, end, 3037 clear_extent_bits(&log->dirty_log_pages, start, end,
3027 EXTENT_DIRTY | EXTENT_NEW); 3038 EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);
3028 } 3039 }
3029 3040
3030 /* 3041 /*
@@ -5677,6 +5688,23 @@ again:
5677 path); 5688 path);
5678 } 5689 }
5679 5690
5691 if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) {
5692 struct btrfs_root *root = wc.replay_dest;
5693
5694 btrfs_release_path(path);
5695
5696 /*
5697 * We have just replayed everything, and the highest
5698 * objectid of fs roots probably has changed in case
5699 * some inode_item's got replayed.
5700 *
5701 * root->objectid_mutex is not acquired as log replay
5702 * could only happen during mount.
5703 */
5704 ret = btrfs_find_highest_objectid(root,
5705 &root->highest_objectid);
5706 }
5707
5680 key.offset = found_key.offset - 1; 5708 key.offset = found_key.offset - 1;
5681 wc.replay_dest->log_root = NULL; 5709 wc.replay_dest->log_root = NULL;
5682 free_extent_buffer(log->node); 5710 free_extent_buffer(log->node);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b5036bd69e6a..2ceb924ca0d6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -645,6 +645,7 @@ static void btrfs_free_stale_devices(const char *path,
645 btrfs_sysfs_remove_fsid(fs_devs); 645 btrfs_sysfs_remove_fsid(fs_devs);
646 list_del(&fs_devs->list); 646 list_del(&fs_devs->list);
647 free_fs_devices(fs_devs); 647 free_fs_devices(fs_devs);
648 break;
648 } else { 649 } else {
649 fs_devs->num_devices--; 650 fs_devs->num_devices--;
650 list_del(&dev->dev_list); 651 list_del(&dev->dev_list);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index cff79ea0c01d..5243989a60cc 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -482,7 +482,6 @@ static void lowcomms_error_report(struct sock *sk)
482{ 482{
483 struct connection *con; 483 struct connection *con;
484 struct sockaddr_storage saddr; 484 struct sockaddr_storage saddr;
485 int buflen;
486 void (*orig_report)(struct sock *) = NULL; 485 void (*orig_report)(struct sock *) = NULL;
487 486
488 read_lock_bh(&sk->sk_callback_lock); 487 read_lock_bh(&sk->sk_callback_lock);
@@ -492,7 +491,7 @@ static void lowcomms_error_report(struct sock *sk)
492 491
493 orig_report = listen_sock.sk_error_report; 492 orig_report = listen_sock.sk_error_report;
494 if (con->sock == NULL || 493 if (con->sock == NULL ||
495 kernel_getpeername(con->sock, (struct sockaddr *)&saddr, &buflen)) { 494 kernel_getpeername(con->sock, (struct sockaddr *)&saddr) < 0) {
496 printk_ratelimited(KERN_ERR "dlm: node %d: socket error " 495 printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
497 "sending to node %d, port %d, " 496 "sending to node %d, port %d, "
498 "sk_err=%d/%d\n", dlm_our_nodeid(), 497 "sk_err=%d/%d\n", dlm_our_nodeid(),
@@ -757,8 +756,8 @@ static int tcp_accept_from_sock(struct connection *con)
757 756
758 /* Get the connected socket's peer */ 757 /* Get the connected socket's peer */
759 memset(&peeraddr, 0, sizeof(peeraddr)); 758 memset(&peeraddr, 0, sizeof(peeraddr));
760 if (newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr, 759 len = newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr, 2);
761 &len, 2)) { 760 if (len < 0) {
762 result = -ECONNABORTED; 761 result = -ECONNABORTED;
763 goto accept_err; 762 goto accept_err;
764 } 763 }
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 86863792f36a..86d6a4435c87 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -716,7 +716,7 @@ int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
716 __be64 *ptr; 716 __be64 *ptr;
717 sector_t lblock; 717 sector_t lblock;
718 sector_t lend; 718 sector_t lend;
719 int ret; 719 int ret = 0;
720 int eob; 720 int eob;
721 unsigned int len; 721 unsigned int len;
722 struct buffer_head *bh; 722 struct buffer_head *bh;
@@ -728,12 +728,14 @@ int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
728 goto out; 728 goto out;
729 } 729 }
730 730
731 if ((flags & IOMAP_REPORT) && gfs2_is_stuffed(ip)) { 731 if (gfs2_is_stuffed(ip)) {
732 gfs2_stuffed_iomap(inode, iomap); 732 if (flags & IOMAP_REPORT) {
733 if (pos >= iomap->length) 733 gfs2_stuffed_iomap(inode, iomap);
734 return -ENOENT; 734 if (pos >= iomap->length)
735 ret = 0; 735 ret = -ENOENT;
736 goto out; 736 goto out;
737 }
738 BUG_ON(!(flags & IOMAP_WRITE));
737 } 739 }
738 740
739 lblock = pos >> inode->i_blkbits; 741 lblock = pos >> inode->i_blkbits;
@@ -744,7 +746,7 @@ int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
744 iomap->type = IOMAP_HOLE; 746 iomap->type = IOMAP_HOLE;
745 iomap->length = (u64)(lend - lblock) << inode->i_blkbits; 747 iomap->length = (u64)(lend - lblock) << inode->i_blkbits;
746 iomap->flags = IOMAP_F_MERGED; 748 iomap->flags = IOMAP_F_MERGED;
747 bmap_lock(ip, 0); 749 bmap_lock(ip, flags & IOMAP_WRITE);
748 750
749 /* 751 /*
750 * Directory data blocks have a struct gfs2_meta_header header, so the 752 * Directory data blocks have a struct gfs2_meta_header header, so the
@@ -787,27 +789,28 @@ int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
787 iomap->flags |= IOMAP_F_BOUNDARY; 789 iomap->flags |= IOMAP_F_BOUNDARY;
788 iomap->length = (u64)len << inode->i_blkbits; 790 iomap->length = (u64)len << inode->i_blkbits;
789 791
790 ret = 0;
791
792out_release: 792out_release:
793 release_metapath(&mp); 793 release_metapath(&mp);
794 bmap_unlock(ip, 0); 794 bmap_unlock(ip, flags & IOMAP_WRITE);
795out: 795out:
796 trace_gfs2_iomap_end(ip, iomap, ret); 796 trace_gfs2_iomap_end(ip, iomap, ret);
797 return ret; 797 return ret;
798 798
799do_alloc: 799do_alloc:
800 if (!(flags & IOMAP_WRITE)) { 800 if (flags & IOMAP_WRITE) {
801 if (pos >= i_size_read(inode)) { 801 ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
802 } else if (flags & IOMAP_REPORT) {
803 loff_t size = i_size_read(inode);
804 if (pos >= size)
802 ret = -ENOENT; 805 ret = -ENOENT;
803 goto out_release; 806 else if (height <= ip->i_height)
804 } 807 iomap->length = hole_size(inode, lblock, &mp);
805 ret = 0; 808 else
806 iomap->length = hole_size(inode, lblock, &mp); 809 iomap->length = size - pos;
807 goto out_release; 810 } else {
811 if (height <= ip->i_height)
812 iomap->length = hole_size(inode, lblock, &mp);
808 } 813 }
809
810 ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
811 goto out_release; 814 goto out_release;
812} 815}
813 816
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 36b0772701a0..60702d677bd4 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -184,6 +184,7 @@ int open_related_ns(struct ns_common *ns,
184 184
185 return fd; 185 return fd;
186} 186}
187EXPORT_SYMBOL_GPL(open_related_ns);
187 188
188static long ns_ioctl(struct file *filp, unsigned int ioctl, 189static long ns_ioctl(struct file *filp, unsigned int ioctl,
189 unsigned long arg) 190 unsigned long arg)
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index eac5140aac47..e5076185cc1e 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1819,7 +1819,7 @@ int o2net_register_hb_callbacks(void)
1819 1819
1820static int o2net_accept_one(struct socket *sock, int *more) 1820static int o2net_accept_one(struct socket *sock, int *more)
1821{ 1821{
1822 int ret, slen; 1822 int ret;
1823 struct sockaddr_in sin; 1823 struct sockaddr_in sin;
1824 struct socket *new_sock = NULL; 1824 struct socket *new_sock = NULL;
1825 struct o2nm_node *node = NULL; 1825 struct o2nm_node *node = NULL;
@@ -1864,9 +1864,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
1864 goto out; 1864 goto out;
1865 } 1865 }
1866 1866
1867 slen = sizeof(sin); 1867 ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, 1);
1868 ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin,
1869 &slen, 1);
1870 if (ret < 0) 1868 if (ret < 0)
1871 goto out; 1869 goto out;
1872 1870
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e8a93bc8285d..d1e82761de81 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -510,6 +510,10 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
510 /* we have to zero-fill user buffer even if no read */ 510 /* we have to zero-fill user buffer even if no read */
511 if (copy_to_user(buffer, buf, tsz)) 511 if (copy_to_user(buffer, buf, tsz))
512 return -EFAULT; 512 return -EFAULT;
513 } else if (m->type == KCORE_USER) {
514 /* User page is handled prior to normal kernel page: */
515 if (copy_to_user(buffer, (char *)start, tsz))
516 return -EFAULT;
513 } else { 517 } else {
514 if (kern_addr_valid(start)) { 518 if (kern_addr_valid(start)) {
515 /* 519 /*
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 68c06ae7888c..da6f8733c9c5 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -237,6 +237,7 @@ static __net_exit void proc_net_ns_exit(struct net *net)
237static struct pernet_operations __net_initdata proc_net_ns_ops = { 237static struct pernet_operations __net_initdata proc_net_ns_ops = {
238 .init = proc_net_ns_init, 238 .init = proc_net_ns_init,
239 .exit = proc_net_ns_exit, 239 .exit = proc_net_ns_exit,
240 .async = true,
240}; 241};
241 242
242int __init proc_net_init(void) 243int __init proc_net_init(void)
diff --git a/include/asm-generic/bitops/lock.h b/include/asm-generic/bitops/lock.h
index bc397573c43a..67ab280ad134 100644
--- a/include/asm-generic/bitops/lock.h
+++ b/include/asm-generic/bitops/lock.h
@@ -7,7 +7,8 @@
7 * @nr: Bit to set 7 * @nr: Bit to set
8 * @addr: Address to count from 8 * @addr: Address to count from
9 * 9 *
10 * This operation is atomic and provides acquire barrier semantics. 10 * This operation is atomic and provides acquire barrier semantics if
11 * the returned value is 0.
11 * It can be used to implement bit locks. 12 * It can be used to implement bit locks.
12 */ 13 */
13#define test_and_set_bit_lock(nr, addr) test_and_set_bit(nr, addr) 14#define test_and_set_bit_lock(nr, addr) test_and_set_bit(nr, addr)
diff --git a/include/dt-bindings/net/ti-dp83867.h b/include/dt-bindings/net/ti-dp83867.h
index 172744a72eb7..7b1656427cbe 100644
--- a/include/dt-bindings/net/ti-dp83867.h
+++ b/include/dt-bindings/net/ti-dp83867.h
@@ -42,4 +42,18 @@
42#define DP83867_RGMIIDCTL_3_75_NS 0xe 42#define DP83867_RGMIIDCTL_3_75_NS 0xe
43#define DP83867_RGMIIDCTL_4_00_NS 0xf 43#define DP83867_RGMIIDCTL_4_00_NS 0xf
44 44
45/* IO_MUX_CFG - Clock output selection */
46#define DP83867_CLK_O_SEL_CHN_A_RCLK 0x0
47#define DP83867_CLK_O_SEL_CHN_B_RCLK 0x1
48#define DP83867_CLK_O_SEL_CHN_C_RCLK 0x2
49#define DP83867_CLK_O_SEL_CHN_D_RCLK 0x3
50#define DP83867_CLK_O_SEL_CHN_A_RCLK_DIV5 0x4
51#define DP83867_CLK_O_SEL_CHN_B_RCLK_DIV5 0x5
52#define DP83867_CLK_O_SEL_CHN_C_RCLK_DIV5 0x6
53#define DP83867_CLK_O_SEL_CHN_D_RCLK_DIV5 0x7
54#define DP83867_CLK_O_SEL_CHN_A_TCLK 0x8
55#define DP83867_CLK_O_SEL_CHN_B_TCLK 0x9
56#define DP83867_CLK_O_SEL_CHN_C_TCLK 0xA
57#define DP83867_CLK_O_SEL_CHN_D_TCLK 0xB
58#define DP83867_CLK_O_SEL_REF_CLK 0xC
45#endif 59#endif
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 64e10746f282..968173ec2726 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -587,7 +587,7 @@ extern int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *),
587const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids, 587const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids,
588 const struct device *dev); 588 const struct device *dev);
589 589
590void *acpi_get_match_data(const struct device *dev); 590const void *acpi_device_get_match_data(const struct device *dev);
591extern bool acpi_driver_match_device(struct device *dev, 591extern bool acpi_driver_match_device(struct device *dev,
592 const struct device_driver *drv); 592 const struct device_driver *drv);
593int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *); 593int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *);
@@ -766,7 +766,7 @@ static inline const struct acpi_device_id *acpi_match_device(
766 return NULL; 766 return NULL;
767} 767}
768 768
769static inline void *acpi_get_match_data(const struct device *dev) 769static inline const void *acpi_device_get_match_data(const struct device *dev)
770{ 770{
771 return NULL; 771 return NULL;
772} 772}
diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index 4d356e168692..40373920ea58 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -113,10 +113,12 @@ extern void aarp_proto_init(void);
113/* Inter module exports */ 113/* Inter module exports */
114 114
115/* Give a device find its atif control structure */ 115/* Give a device find its atif control structure */
116#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK)
116static inline struct atalk_iface *atalk_find_dev(struct net_device *dev) 117static inline struct atalk_iface *atalk_find_dev(struct net_device *dev)
117{ 118{
118 return dev->atalk_ptr; 119 return dev->atalk_ptr;
119} 120}
121#endif
120 122
121extern struct atalk_addr *atalk_find_dev_addr(struct net_device *dev); 123extern struct atalk_addr *atalk_find_dev_addr(struct net_device *dev);
122extern struct net_device *atrtr_get_dev(struct atalk_addr *sa); 124extern struct net_device *atrtr_get_dev(struct atalk_addr *sa);
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 3ce61342fa31..b0a7f315bfbe 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -136,15 +136,21 @@ enum virtchnl_ops {
136 VIRTCHNL_OP_ENABLE_VLAN_STRIPPING = 27, 136 VIRTCHNL_OP_ENABLE_VLAN_STRIPPING = 27,
137 VIRTCHNL_OP_DISABLE_VLAN_STRIPPING = 28, 137 VIRTCHNL_OP_DISABLE_VLAN_STRIPPING = 28,
138 VIRTCHNL_OP_REQUEST_QUEUES = 29, 138 VIRTCHNL_OP_REQUEST_QUEUES = 29,
139 VIRTCHNL_OP_ENABLE_CHANNELS = 30,
140 VIRTCHNL_OP_DISABLE_CHANNELS = 31,
141 VIRTCHNL_OP_ADD_CLOUD_FILTER = 32,
142 VIRTCHNL_OP_DEL_CLOUD_FILTER = 33,
139}; 143};
140 144
141/* This macro is used to generate a compilation error if a structure 145/* These macros are used to generate compilation errors if a structure/union
142 * is not exactly the correct length. It gives a divide by zero error if the 146 * is not exactly the correct length. It gives a divide by zero error if the
143 * structure is not of the correct size, otherwise it creates an enum that is 147 * structure/union is not of the correct size, otherwise it creates an enum
144 * never used. 148 * that is never used.
145 */ 149 */
146#define VIRTCHNL_CHECK_STRUCT_LEN(n, X) enum virtchnl_static_assert_enum_##X \ 150#define VIRTCHNL_CHECK_STRUCT_LEN(n, X) enum virtchnl_static_assert_enum_##X \
147 { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) } 151 { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
152#define VIRTCHNL_CHECK_UNION_LEN(n, X) enum virtchnl_static_asset_enum_##X \
153 { virtchnl_static_assert_##X = (n)/((sizeof(union X) == (n)) ? 1 : 0) }
148 154
149/* Virtual channel message descriptor. This overlays the admin queue 155/* Virtual channel message descriptor. This overlays the admin queue
150 * descriptor. All other data is passed in external buffers. 156 * descriptor. All other data is passed in external buffers.
@@ -244,6 +250,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
244#define VIRTCHNL_VF_OFFLOAD_ENCAP 0X00100000 250#define VIRTCHNL_VF_OFFLOAD_ENCAP 0X00100000
245#define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00200000 251#define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00200000
246#define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM 0X00400000 252#define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM 0X00400000
253#define VIRTCHNL_VF_OFFLOAD_ADQ 0X00800000
247 254
248#define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \ 255#define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \
249 VIRTCHNL_VF_OFFLOAD_VLAN | \ 256 VIRTCHNL_VF_OFFLOAD_VLAN | \
@@ -496,6 +503,81 @@ struct virtchnl_rss_hena {
496 503
497VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_rss_hena); 504VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_rss_hena);
498 505
506/* VIRTCHNL_OP_ENABLE_CHANNELS
507 * VIRTCHNL_OP_DISABLE_CHANNELS
508 * VF sends these messages to enable or disable channels based on
509 * the user specified queue count and queue offset for each traffic class.
510 * This struct encompasses all the information that the PF needs from
511 * VF to create a channel.
512 */
513struct virtchnl_channel_info {
514 u16 count; /* number of queues in a channel */
515 u16 offset; /* queues in a channel start from 'offset' */
516 u32 pad;
517 u64 max_tx_rate;
518};
519
520VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_channel_info);
521
522struct virtchnl_tc_info {
523 u32 num_tc;
524 u32 pad;
525 struct virtchnl_channel_info list[1];
526};
527
528VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_tc_info);
529
530/* VIRTCHNL_ADD_CLOUD_FILTER
531 * VIRTCHNL_DEL_CLOUD_FILTER
532 * VF sends these messages to add or delete a cloud filter based on the
533 * user specified match and action filters. These structures encompass
534 * all the information that the PF needs from the VF to add/delete a
535 * cloud filter.
536 */
537
538struct virtchnl_l4_spec {
539 u8 src_mac[ETH_ALEN];
540 u8 dst_mac[ETH_ALEN];
541 __be16 vlan_id;
542 __be16 pad; /* reserved for future use */
543 __be32 src_ip[4];
544 __be32 dst_ip[4];
545 __be16 src_port;
546 __be16 dst_port;
547};
548
549VIRTCHNL_CHECK_STRUCT_LEN(52, virtchnl_l4_spec);
550
551union virtchnl_flow_spec {
552 struct virtchnl_l4_spec tcp_spec;
553 u8 buffer[128]; /* reserved for future use */
554};
555
556VIRTCHNL_CHECK_UNION_LEN(128, virtchnl_flow_spec);
557
558enum virtchnl_action {
559 /* action types */
560 VIRTCHNL_ACTION_DROP = 0,
561 VIRTCHNL_ACTION_TC_REDIRECT,
562};
563
564enum virtchnl_flow_type {
565 /* flow types */
566 VIRTCHNL_TCP_V4_FLOW = 0,
567 VIRTCHNL_TCP_V6_FLOW,
568};
569
570struct virtchnl_filter {
571 union virtchnl_flow_spec data;
572 union virtchnl_flow_spec mask;
573 enum virtchnl_flow_type flow_type;
574 enum virtchnl_action action;
575 u32 action_meta;
576 __u8 field_flags;
577};
578
579VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter);
580
499/* VIRTCHNL_OP_EVENT 581/* VIRTCHNL_OP_EVENT
500 * PF sends this message to inform the VF driver of events that may affect it. 582 * PF sends this message to inform the VF driver of events that may affect it.
501 * No direct response is expected from the VF, though it may generate other 583 * No direct response is expected from the VF, though it may generate other
@@ -711,6 +793,25 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
711 case VIRTCHNL_OP_REQUEST_QUEUES: 793 case VIRTCHNL_OP_REQUEST_QUEUES:
712 valid_len = sizeof(struct virtchnl_vf_res_request); 794 valid_len = sizeof(struct virtchnl_vf_res_request);
713 break; 795 break;
796 case VIRTCHNL_OP_ENABLE_CHANNELS:
797 valid_len = sizeof(struct virtchnl_tc_info);
798 if (msglen >= valid_len) {
799 struct virtchnl_tc_info *vti =
800 (struct virtchnl_tc_info *)msg;
801 valid_len += vti->num_tc *
802 sizeof(struct virtchnl_channel_info);
803 if (vti->num_tc == 0)
804 err_msg_format = true;
805 }
806 break;
807 case VIRTCHNL_OP_DISABLE_CHANNELS:
808 break;
809 case VIRTCHNL_OP_ADD_CLOUD_FILTER:
810 valid_len = sizeof(struct virtchnl_filter);
811 break;
812 case VIRTCHNL_OP_DEL_CLOUD_FILTER:
813 valid_len = sizeof(struct virtchnl_filter);
814 break;
714 /* These are always errors coming from the VF. */ 815 /* These are always errors coming from the VF. */
715 case VIRTCHNL_OP_EVENT: 816 case VIRTCHNL_OP_EVENT:
716 case VIRTCHNL_OP_UNKNOWN: 817 case VIRTCHNL_OP_UNKNOWN:
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4f3df807cf8f..ed63f3b69c12 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -49,7 +49,7 @@ struct blk_stat_callback;
49#define BLKDEV_MIN_RQ 4 49#define BLKDEV_MIN_RQ 4
50#define BLKDEV_MAX_RQ 128 /* Default maximum */ 50#define BLKDEV_MAX_RQ 128 /* Default maximum */
51 51
52/* Must be consisitent with blk_mq_poll_stats_bkt() */ 52/* Must be consistent with blk_mq_poll_stats_bkt() */
53#define BLK_MQ_POLL_STATS_BKTS 16 53#define BLK_MQ_POLL_STATS_BKTS 16
54 54
55/* 55/*
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 631354acfa72..73bc63e0a1c4 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -167,8 +167,6 @@
167 167
168#if GCC_VERSION >= 40100 168#if GCC_VERSION >= 40100
169# define __compiletime_object_size(obj) __builtin_object_size(obj, 0) 169# define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
170
171#define __nostackprotector __attribute__((__optimize__("no-stack-protector")))
172#endif 170#endif
173 171
174#if GCC_VERSION >= 40300 172#if GCC_VERSION >= 40300
@@ -196,6 +194,11 @@
196#endif /* __CHECKER__ */ 194#endif /* __CHECKER__ */
197#endif /* GCC_VERSION >= 40300 */ 195#endif /* GCC_VERSION >= 40300 */
198 196
197#if GCC_VERSION >= 40400
198#define __optimize(level) __attribute__((__optimize__(level)))
199#define __nostackprotector __optimize("no-stack-protector")
200#endif /* GCC_VERSION >= 40400 */
201
199#if GCC_VERSION >= 40500 202#if GCC_VERSION >= 40500
200 203
201#ifndef __CHECKER__ 204#ifndef __CHECKER__
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index c2cc57a2f508..e835fc0423ec 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -277,6 +277,10 @@ unsigned long read_word_at_a_time(const void *addr)
277 277
278#endif /* __ASSEMBLY__ */ 278#endif /* __ASSEMBLY__ */
279 279
280#ifndef __optimize
281# define __optimize(level)
282#endif
283
280/* Compile time object size, -1 for unknown */ 284/* Compile time object size, -1 for unknown */
281#ifndef __compiletime_object_size 285#ifndef __compiletime_object_size
282# define __compiletime_object_size(obj) -1 286# define __compiletime_object_size(obj) -1
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 871f9e21810c..0b3fc229086c 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -225,7 +225,7 @@ static inline void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev,
225} 225}
226#endif 226#endif
227 227
228#ifdef CONFIG_ARCH_HAS_CPU_RELAX 228#if defined(CONFIG_CPU_IDLE) && defined(CONFIG_ARCH_HAS_CPU_RELAX)
229void cpuidle_poll_state_init(struct cpuidle_driver *drv); 229void cpuidle_poll_state_init(struct cpuidle_driver *drv);
230#else 230#else
231static inline void cpuidle_poll_state_init(struct cpuidle_driver *drv) {} 231static inline void cpuidle_poll_state_init(struct cpuidle_driver *drv) {}
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index d4a2a7dcd72d..bf53d893ad02 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -170,6 +170,8 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node)
170 for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) 170 for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
171#define for_each_cpu_not(cpu, mask) \ 171#define for_each_cpu_not(cpu, mask) \
172 for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) 172 for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
173#define for_each_cpu_wrap(cpu, mask, start) \
174 for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start))
173#define for_each_cpu_and(cpu, mask, and) \ 175#define for_each_cpu_and(cpu, mask, and) \
174 for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and) 176 for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and)
175#else 177#else
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 34fe8463d10e..eb9eab4ecd6d 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -578,7 +578,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
578 578
579/* 579/*
580 * This is a hack for the legacy x86 forbid_dac and iommu_sac_force. Please 580 * This is a hack for the legacy x86 forbid_dac and iommu_sac_force. Please
581 * don't use this is new code. 581 * don't use this in new code.
582 */ 582 */
583#ifndef arch_dma_supported 583#ifndef arch_dma_supported
584#define arch_dma_supported(dev, mask) (1) 584#define arch_dma_supported(dev, mask) (1)
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 4fa1a489efe4..4fe8f289b3f6 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -73,8 +73,8 @@ struct fwnode_operations {
73 struct fwnode_handle *(*get)(struct fwnode_handle *fwnode); 73 struct fwnode_handle *(*get)(struct fwnode_handle *fwnode);
74 void (*put)(struct fwnode_handle *fwnode); 74 void (*put)(struct fwnode_handle *fwnode);
75 bool (*device_is_available)(const struct fwnode_handle *fwnode); 75 bool (*device_is_available)(const struct fwnode_handle *fwnode);
76 void *(*device_get_match_data)(const struct fwnode_handle *fwnode, 76 const void *(*device_get_match_data)(const struct fwnode_handle *fwnode,
77 const struct device *dev); 77 const struct device *dev);
78 bool (*property_present)(const struct fwnode_handle *fwnode, 78 bool (*property_present)(const struct fwnode_handle *fwnode,
79 const char *propname); 79 const char *propname);
80 int (*property_read_int_array)(const struct fwnode_handle *fwnode, 80 int (*property_read_int_array)(const struct fwnode_handle *fwnode,
diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index 7ff25a808fef..80db19d3a505 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -10,6 +10,7 @@ enum kcore_type {
10 KCORE_VMALLOC, 10 KCORE_VMALLOC,
11 KCORE_RAM, 11 KCORE_RAM,
12 KCORE_VMEMMAP, 12 KCORE_VMEMMAP,
13 KCORE_USER,
13 KCORE_OTHER, 14 KCORE_OTHER,
14}; 15};
15 16
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index c30b32e3c862..10191c28fc04 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -127,10 +127,4 @@ static __always_inline enum lru_list page_lru(struct page *page)
127 127
128#define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) 128#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
129 129
130#ifdef arch_unmap_kpfn
131extern void arch_unmap_kpfn(unsigned long pfn);
132#else
133static __always_inline void arch_unmap_kpfn(unsigned long pfn) { }
134#endif
135
136#endif 130#endif
diff --git a/include/linux/net.h b/include/linux/net.h
index 91216b16feb7..000d1aada74f 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -146,7 +146,7 @@ struct proto_ops {
146 struct socket *newsock, int flags, bool kern); 146 struct socket *newsock, int flags, bool kern);
147 int (*getname) (struct socket *sock, 147 int (*getname) (struct socket *sock,
148 struct sockaddr *addr, 148 struct sockaddr *addr,
149 int *sockaddr_len, int peer); 149 int peer);
150 __poll_t (*poll) (struct file *file, struct socket *sock, 150 __poll_t (*poll) (struct file *file, struct socket *sock,
151 struct poll_table_struct *wait); 151 struct poll_table_struct *wait);
152 int (*ioctl) (struct socket *sock, unsigned int cmd, 152 int (*ioctl) (struct socket *sock, unsigned int cmd,
@@ -294,10 +294,8 @@ int kernel_listen(struct socket *sock, int backlog);
294int kernel_accept(struct socket *sock, struct socket **newsock, int flags); 294int kernel_accept(struct socket *sock, struct socket **newsock, int flags);
295int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, 295int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
296 int flags); 296 int flags);
297int kernel_getsockname(struct socket *sock, struct sockaddr *addr, 297int kernel_getsockname(struct socket *sock, struct sockaddr *addr);
298 int *addrlen); 298int kernel_getpeername(struct socket *sock, struct sockaddr *addr);
299int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
300 int *addrlen);
301int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval, 299int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval,
302 int *optlen); 300 int *optlen);
303int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval, 301int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5eef6c8e2741..dbe6344b727a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1798,11 +1798,17 @@ struct net_device {
1798#if IS_ENABLED(CONFIG_TIPC) 1798#if IS_ENABLED(CONFIG_TIPC)
1799 struct tipc_bearer __rcu *tipc_ptr; 1799 struct tipc_bearer __rcu *tipc_ptr;
1800#endif 1800#endif
1801#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK)
1801 void *atalk_ptr; 1802 void *atalk_ptr;
1803#endif
1802 struct in_device __rcu *ip_ptr; 1804 struct in_device __rcu *ip_ptr;
1805#if IS_ENABLED(CONFIG_DECNET)
1803 struct dn_dev __rcu *dn_ptr; 1806 struct dn_dev __rcu *dn_ptr;
1807#endif
1804 struct inet6_dev __rcu *ip6_ptr; 1808 struct inet6_dev __rcu *ip6_ptr;
1809#if IS_ENABLED(CONFIG_AX25)
1805 void *ax25_ptr; 1810 void *ax25_ptr;
1811#endif
1806 struct wireless_dev *ieee80211_ptr; 1812 struct wireless_dev *ieee80211_ptr;
1807 struct wpan_dev *ieee802154_ptr; 1813 struct wpan_dev *ieee802154_ptr;
1808#if IS_ENABLED(CONFIG_MPLS_ROUTING) 1814#if IS_ENABLED(CONFIG_MPLS_ROUTING)
diff --git a/include/linux/nospec.h b/include/linux/nospec.h
index b99bced39ac2..fbc98e2c8228 100644
--- a/include/linux/nospec.h
+++ b/include/linux/nospec.h
@@ -20,20 +20,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
20 unsigned long size) 20 unsigned long size)
21{ 21{
22 /* 22 /*
23 * Warn developers about inappropriate array_index_nospec() usage.
24 *
25 * Even if the CPU speculates past the WARN_ONCE branch, the
26 * sign bit of @index is taken into account when generating the
27 * mask.
28 *
29 * This warning is compiled out when the compiler can infer that
30 * @index and @size are less than LONG_MAX.
31 */
32 if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX,
33 "array_index_nospec() limited to range of [0, LONG_MAX]\n"))
34 return 0;
35
36 /*
37 * Always calculate and emit the mask even if the compiler 23 * Always calculate and emit the mask even if the compiler
38 * thinks the mask is not needed. The compiler does not take 24 * thinks the mask is not needed. The compiler does not take
39 * into account the value of @index under speculation. 25 * into account the value of @index under speculation.
@@ -44,6 +30,26 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
44#endif 30#endif
45 31
46/* 32/*
33 * Warn developers about inappropriate array_index_nospec() usage.
34 *
35 * Even if the CPU speculates past the WARN_ONCE branch, the
36 * sign bit of @index is taken into account when generating the
37 * mask.
38 *
39 * This warning is compiled out when the compiler can infer that
40 * @index and @size are less than LONG_MAX.
41 */
42#define array_index_mask_nospec_check(index, size) \
43({ \
44 if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, \
45 "array_index_nospec() limited to range of [0, LONG_MAX]\n")) \
46 _mask = 0; \
47 else \
48 _mask = array_index_mask_nospec(index, size); \
49 _mask; \
50})
51
52/*
47 * array_index_nospec - sanitize an array index after a bounds check 53 * array_index_nospec - sanitize an array index after a bounds check
48 * 54 *
49 * For a code sequence like: 55 * For a code sequence like:
@@ -61,7 +67,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
61({ \ 67({ \
62 typeof(index) _i = (index); \ 68 typeof(index) _i = (index); \
63 typeof(size) _s = (size); \ 69 typeof(size) _s = (size); \
64 unsigned long _mask = array_index_mask_nospec(_i, _s); \ 70 unsigned long _mask = array_index_mask_nospec_check(_i, _s); \
65 \ 71 \
66 BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ 72 BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \
67 BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ 73 BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \
diff --git a/include/linux/property.h b/include/linux/property.h
index 769d372c1edf..2eea4b310fc2 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -283,7 +283,7 @@ bool device_dma_supported(struct device *dev);
283 283
284enum dev_dma_attr device_get_dma_attr(struct device *dev); 284enum dev_dma_attr device_get_dma_attr(struct device *dev);
285 285
286void *device_get_match_data(struct device *dev); 286const void *device_get_match_data(struct device *dev);
287 287
288int device_get_phy_mode(struct device *dev); 288int device_get_phy_mode(struct device *dev);
289 289
diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
index a079656b614c..059242030631 100644
--- a/include/linux/ptp_classify.h
+++ b/include/linux/ptp_classify.h
@@ -75,5 +75,9 @@ void __init ptp_classifier_init(void);
75static inline void ptp_classifier_init(void) 75static inline void ptp_classifier_init(void)
76{ 76{
77} 77}
78static inline unsigned int ptp_classify_raw(struct sk_buff *skb)
79{
80 return PTP_CLASS_NONE;
81}
78#endif 82#endif
79#endif /* _PTP_CLASSIFY_H_ */ 83#endif /* _PTP_CLASSIFY_H_ */
diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index b884b7794187..6894976b54e3 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -296,13 +296,14 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r)
296{ 296{
297 void *ptr; 297 void *ptr;
298 298
299 /* The READ_ONCE in __ptr_ring_peek guarantees that anyone
300 * accessing data through the pointer is up to date. Pairs
301 * with smp_wmb in __ptr_ring_produce.
302 */
299 ptr = __ptr_ring_peek(r); 303 ptr = __ptr_ring_peek(r);
300 if (ptr) 304 if (ptr)
301 __ptr_ring_discard_one(r); 305 __ptr_ring_discard_one(r);
302 306
303 /* Make sure anyone accessing data through the pointer is up to date. */
304 /* Pairs with smp_wmb in __ptr_ring_produce. */
305 smp_read_barrier_depends();
306 return ptr; 307 return ptr;
307} 308}
308 309
@@ -469,7 +470,7 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
469 */ 470 */
470static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp) 471static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
471{ 472{
472 if (size * sizeof(void *) > KMALLOC_MAX_SIZE) 473 if (size > KMALLOC_MAX_SIZE / sizeof(void *))
473 return NULL; 474 return NULL;
474 return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO); 475 return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO);
475} 476}
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 1fdcde96eb65..3573b4bf2fdf 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -35,7 +35,7 @@ extern int rtnl_trylock(void);
35extern int rtnl_is_locked(void); 35extern int rtnl_is_locked(void);
36 36
37extern wait_queue_head_t netdev_unregistering_wq; 37extern wait_queue_head_t netdev_unregistering_wq;
38extern struct mutex net_mutex; 38extern struct rw_semaphore net_sem;
39 39
40#ifdef CONFIG_PROVE_LOCKING 40#ifdef CONFIG_PROVE_LOCKING
41extern bool lockdep_rtnl_is_held(void); 41extern bool lockdep_rtnl_is_held(void);
diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h
index dc368b8ce215..11c86fbfeb98 100644
--- a/include/linux/semaphore.h
+++ b/include/linux/semaphore.h
@@ -4,7 +4,7 @@
4 * 4 *
5 * Distributed under the terms of the GNU GPL, version 2 5 * Distributed under the terms of the GNU GPL, version 2
6 * 6 *
7 * Please see kernel/semaphore.c for documentation of these functions 7 * Please see kernel/locking/semaphore.c for documentation of these functions
8 */ 8 */
9#ifndef __LINUX_SEMAPHORE_H 9#ifndef __LINUX_SEMAPHORE_H
10#define __LINUX_SEMAPHORE_H 10#define __LINUX_SEMAPHORE_H
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5ebc0f869720..9bc1750ca3d3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -466,6 +466,9 @@ struct ubuf_info {
466 466
467#define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg)) 467#define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
468 468
469int mm_account_pinned_pages(struct mmpin *mmp, size_t size);
470void mm_unaccount_pinned_pages(struct mmpin *mmp);
471
469struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size); 472struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size);
470struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size, 473struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
471 struct ubuf_info *uarg); 474 struct ubuf_info *uarg);
@@ -3646,7 +3649,7 @@ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb,
3646 return true; 3649 return true;
3647} 3650}
3648 3651
3649/* For small packets <= CHECKSUM_BREAK peform checksum complete directly 3652/* For small packets <= CHECKSUM_BREAK perform checksum complete directly
3650 * in checksum_init. 3653 * in checksum_init.
3651 */ 3654 */
3652#define CHECKSUM_BREAK 76 3655#define CHECKSUM_BREAK 76
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 9286a5a8c60c..1ce1f768a58c 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -353,4 +353,6 @@ extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen
353 unsigned int flags, struct timespec *timeout); 353 unsigned int flags, struct timespec *timeout);
354extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, 354extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
355 unsigned int vlen, unsigned int flags); 355 unsigned int vlen, unsigned int flags);
356
357extern struct ns_common *get_net_ns(struct ns_common *ns);
356#endif /* _LINUX_SOCKET_H */ 358#endif /* _LINUX_SOCKET_H */
diff --git a/include/net/Space.h b/include/net/Space.h
index 27fb5c937c4f..336da258885a 100644
--- a/include/net/Space.h
+++ b/include/net/Space.h
@@ -20,7 +20,6 @@ struct net_device *cs89x0_probe(int unit);
20struct net_device *mvme147lance_probe(int unit); 20struct net_device *mvme147lance_probe(int unit);
21struct net_device *tc515_probe(int unit); 21struct net_device *tc515_probe(int unit);
22struct net_device *lance_probe(int unit); 22struct net_device *lance_probe(int unit);
23struct net_device *mac8390_probe(int unit);
24struct net_device *mac89x0_probe(int unit); 23struct net_device *mac89x0_probe(int unit);
25struct net_device *cops_probe(int unit); 24struct net_device *cops_probe(int unit);
26struct net_device *ltpc_probe(void); 25struct net_device *ltpc_probe(void);
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 6ed9692f20bd..9c2f22695025 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -87,12 +87,15 @@ struct tc_action_ops {
87 struct tcf_result *); 87 struct tcf_result *);
88 int (*dump)(struct sk_buff *, struct tc_action *, int, int); 88 int (*dump)(struct sk_buff *, struct tc_action *, int, int);
89 void (*cleanup)(struct tc_action *); 89 void (*cleanup)(struct tc_action *);
90 int (*lookup)(struct net *, struct tc_action **, u32); 90 int (*lookup)(struct net *net, struct tc_action **a, u32 index,
91 struct netlink_ext_ack *extack);
91 int (*init)(struct net *net, struct nlattr *nla, 92 int (*init)(struct net *net, struct nlattr *nla,
92 struct nlattr *est, struct tc_action **act, int ovr, 93 struct nlattr *est, struct tc_action **act, int ovr,
93 int bind); 94 int bind, struct netlink_ext_ack *extack);
94 int (*walk)(struct net *, struct sk_buff *, 95 int (*walk)(struct net *, struct sk_buff *,
95 struct netlink_callback *, int, const struct tc_action_ops *); 96 struct netlink_callback *, int,
97 const struct tc_action_ops *,
98 struct netlink_ext_ack *);
96 void (*stats_update)(struct tc_action *, u64, u32, u64); 99 void (*stats_update)(struct tc_action *, u64, u32, u64);
97 struct net_device *(*get_dev)(const struct tc_action *a); 100 struct net_device *(*get_dev)(const struct tc_action *a);
98}; 101};
@@ -137,7 +140,8 @@ static inline void tc_action_net_exit(struct list_head *net_list,
137 140
138int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, 141int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
139 struct netlink_callback *cb, int type, 142 struct netlink_callback *cb, int type,
140 const struct tc_action_ops *ops); 143 const struct tc_action_ops *ops,
144 struct netlink_ext_ack *extack);
141int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index); 145int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index);
142bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, 146bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
143 int bind); 147 int bind);
@@ -162,10 +166,11 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
162 int nr_actions, struct tcf_result *res); 166 int nr_actions, struct tcf_result *res);
163int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, 167int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
164 struct nlattr *est, char *name, int ovr, int bind, 168 struct nlattr *est, char *name, int ovr, int bind,
165 struct list_head *actions); 169 struct list_head *actions, struct netlink_ext_ack *extack);
166struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, 170struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
167 struct nlattr *nla, struct nlattr *est, 171 struct nlattr *nla, struct nlattr *est,
168 char *name, int ovr, int bind); 172 char *name, int ovr, int bind,
173 struct netlink_ext_ack *extack);
169int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int); 174int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int);
170int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); 175int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
171int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int); 176int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
diff --git a/include/net/ax25.h b/include/net/ax25.h
index 76fb39c272a7..c91bc87931c7 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -318,10 +318,12 @@ void ax25_digi_invert(const ax25_digi *, ax25_digi *);
318extern ax25_dev *ax25_dev_list; 318extern ax25_dev *ax25_dev_list;
319extern spinlock_t ax25_dev_lock; 319extern spinlock_t ax25_dev_lock;
320 320
321#if IS_ENABLED(CONFIG_AX25)
321static inline ax25_dev *ax25_dev_ax25dev(struct net_device *dev) 322static inline ax25_dev *ax25_dev_ax25dev(struct net_device *dev)
322{ 323{
323 return dev->ax25_ptr; 324 return dev->ax25_ptr;
324} 325}
326#endif
325 327
326ax25_dev *ax25_addr_ax25dev(ax25_address *); 328ax25_dev *ax25_addr_ax25dev(ax25_address *);
327void ax25_dev_device_up(struct net_device *); 329void ax25_dev_device_up(struct net_device *);
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 6545b03e97f7..8d1c3f276dea 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -234,13 +234,9 @@ struct devlink_dpipe_headers {
234/** 234/**
235 * struct devlink_resource_ops - resource ops 235 * struct devlink_resource_ops - resource ops
236 * @occ_get: get the occupied size 236 * @occ_get: get the occupied size
237 * @size_validate: validate the size of the resource before update, reload
238 * is needed for changes to take place
239 */ 237 */
240struct devlink_resource_ops { 238struct devlink_resource_ops {
241 u64 (*occ_get)(struct devlink *devlink); 239 u64 (*occ_get)(struct devlink *devlink);
242 int (*size_validate)(struct devlink *devlink, u64 size,
243 struct netlink_ext_ack *extack);
244}; 240};
245 241
246/** 242/**
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 6cb602dd970c..0ad17b63684d 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -19,6 +19,7 @@
19#include <linux/workqueue.h> 19#include <linux/workqueue.h>
20#include <linux/of.h> 20#include <linux/of.h>
21#include <linux/ethtool.h> 21#include <linux/ethtool.h>
22#include <linux/net_tstamp.h>
22#include <net/devlink.h> 23#include <net/devlink.h>
23#include <net/switchdev.h> 24#include <net/switchdev.h>
24 25
@@ -101,6 +102,7 @@ struct dsa_platform_data {
101}; 102};
102 103
103struct packet_type; 104struct packet_type;
105struct dsa_switch;
104 106
105struct dsa_device_ops { 107struct dsa_device_ops {
106 struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); 108 struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
@@ -368,6 +370,12 @@ struct dsa_switch_ops {
368 struct ethtool_wolinfo *w); 370 struct ethtool_wolinfo *w);
369 371
370 /* 372 /*
373 * ethtool timestamp info
374 */
375 int (*get_ts_info)(struct dsa_switch *ds, int port,
376 struct ethtool_ts_info *ts);
377
378 /*
371 * Suspend and resume 379 * Suspend and resume
372 */ 380 */
373 int (*suspend)(struct dsa_switch *ds); 381 int (*suspend)(struct dsa_switch *ds);
@@ -469,6 +477,18 @@ struct dsa_switch_ops {
469 int port, struct net_device *br); 477 int port, struct net_device *br);
470 void (*crosschip_bridge_leave)(struct dsa_switch *ds, int sw_index, 478 void (*crosschip_bridge_leave)(struct dsa_switch *ds, int sw_index,
471 int port, struct net_device *br); 479 int port, struct net_device *br);
480
481 /*
482 * PTP functionality
483 */
484 int (*port_hwtstamp_get)(struct dsa_switch *ds, int port,
485 struct ifreq *ifr);
486 int (*port_hwtstamp_set)(struct dsa_switch *ds, int port,
487 struct ifreq *ifr);
488 bool (*port_txtstamp)(struct dsa_switch *ds, int port,
489 struct sk_buff *clone, unsigned int type);
490 bool (*port_rxtstamp)(struct dsa_switch *ds, int port,
491 struct sk_buff *skb, unsigned int type);
472}; 492};
473 493
474struct dsa_switch_driver { 494struct dsa_switch_driver {
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 648caf90ec07..b166ef07e6d4 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -26,7 +26,8 @@ struct fib_rule {
26 u32 table; 26 u32 table;
27 u8 action; 27 u8 action;
28 u8 l3mdev; 28 u8 l3mdev;
29 /* 2 bytes hole, try to use */ 29 u8 proto;
30 /* 1 byte hole, try to use */
30 u32 target; 31 u32 target;
31 __be64 tun_id; 32 __be64 tun_id;
32 struct fib_rule __rcu *ctarget; 33 struct fib_rule __rcu *ctarget;
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 5a54c9570977..500f81375200 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -32,7 +32,7 @@ int inet_shutdown(struct socket *sock, int how);
32int inet_listen(struct socket *sock, int backlog); 32int inet_listen(struct socket *sock, int backlog);
33void inet_sock_destruct(struct sock *sk); 33void inet_sock_destruct(struct sock *sk);
34int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); 34int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
35int inet_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, 35int inet_getname(struct socket *sock, struct sockaddr *uaddr,
36 int peer); 36 int peer);
37int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 37int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
38int inet_ctl_sock_create(struct sock **sk, unsigned short family, 38int inet_ctl_sock_create(struct sock **sk, unsigned short family,
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 8606c9113d3f..7a98cd583c73 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1056,7 +1056,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu);
1056 1056
1057int inet6_release(struct socket *sock); 1057int inet6_release(struct socket *sock);
1058int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); 1058int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
1059int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, 1059int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
1060 int peer); 1060 int peer);
1061int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 1061int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
1062 1062
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index d747ef975cd8..33fd9ba7e0e5 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -127,6 +127,17 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb);
127int lwtunnel_input(struct sk_buff *skb); 127int lwtunnel_input(struct sk_buff *skb);
128int lwtunnel_xmit(struct sk_buff *skb); 128int lwtunnel_xmit(struct sk_buff *skb);
129 129
130static inline void lwtunnel_set_redirect(struct dst_entry *dst)
131{
132 if (lwtunnel_output_redirect(dst->lwtstate)) {
133 dst->lwtstate->orig_output = dst->output;
134 dst->output = lwtunnel_output;
135 }
136 if (lwtunnel_input_redirect(dst->lwtstate)) {
137 dst->lwtstate->orig_input = dst->input;
138 dst->input = lwtunnel_input;
139 }
140}
130#else 141#else
131 142
132static inline void lwtstate_free(struct lwtunnel_state *lws) 143static inline void lwtstate_free(struct lwtunnel_state *lws)
@@ -158,6 +169,10 @@ static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate)
158 return false; 169 return false;
159} 170}
160 171
172static inline void lwtunnel_set_redirect(struct dst_entry *dst)
173{
174}
175
161static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate, 176static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate,
162 unsigned int mtu) 177 unsigned int mtu)
163{ 178{
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index f306b2aa15a4..d4417495773a 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -59,8 +59,12 @@ struct net {
59 atomic64_t cookie_gen; 59 atomic64_t cookie_gen;
60 60
61 struct list_head list; /* list of network namespaces */ 61 struct list_head list; /* list of network namespaces */
62 struct list_head cleanup_list; /* namespaces on death row */ 62 struct list_head exit_list; /* To linked to call pernet exit
63 struct list_head exit_list; /* Use only net_mutex */ 63 * methods on dead net (net_sem
64 * read locked), or to unregister
65 * pernet ops (net_sem wr locked).
66 */
67 struct llist_node cleanup_list; /* namespaces on death row */
64 68
65 struct user_namespace *user_ns; /* Owning user namespace */ 69 struct user_namespace *user_ns; /* Owning user namespace */
66 struct ucounts *ucounts; 70 struct ucounts *ucounts;
@@ -89,7 +93,7 @@ struct net {
89 /* core fib_rules */ 93 /* core fib_rules */
90 struct list_head rules_ops; 94 struct list_head rules_ops;
91 95
92 struct list_head fib_notifier_ops; /* protected by net_mutex */ 96 struct list_head fib_notifier_ops; /* protected by net_sem */
93 97
94 struct net_device *loopback_dev; /* The loopback */ 98 struct net_device *loopback_dev; /* The loopback */
95 struct netns_core core; 99 struct netns_core core;
@@ -313,6 +317,12 @@ struct pernet_operations {
313 void (*exit_batch)(struct list_head *net_exit_list); 317 void (*exit_batch)(struct list_head *net_exit_list);
314 unsigned int *id; 318 unsigned int *id;
315 size_t size; 319 size_t size;
320 /*
321 * Indicates above methods are allowed to be executed in parallel
322 * with methods of any other pernet_operations, i.e. they are not
323 * need write locked net_sem.
324 */
325 bool async;
316}; 326};
317 327
318/* 328/*
diff --git a/include/net/route.h b/include/net/route.h
index 1eb9ce470e25..158833ea7988 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -65,8 +65,6 @@ struct rtable {
65 /* Miscellaneous cached information */ 65 /* Miscellaneous cached information */
66 u32 rt_pmtu; 66 u32 rt_pmtu;
67 67
68 u32 rt_table_id;
69
70 struct list_head rt_uncached; 68 struct list_head rt_uncached;
71 struct uncached_list *rt_uncached_list; 69 struct uncached_list *rt_uncached_list;
72}; 70};
diff --git a/include/net/sock.h b/include/net/sock.h
index 169c92afcafa..b9624581d639 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -417,6 +417,7 @@ struct sock {
417 struct page_frag sk_frag; 417 struct page_frag sk_frag;
418 netdev_features_t sk_route_caps; 418 netdev_features_t sk_route_caps;
419 netdev_features_t sk_route_nocaps; 419 netdev_features_t sk_route_nocaps;
420 netdev_features_t sk_route_forced_caps;
420 int sk_gso_type; 421 int sk_gso_type;
421 unsigned int sk_gso_max_size; 422 unsigned int sk_gso_max_size;
422 gfp_t sk_allocation; 423 gfp_t sk_allocation;
@@ -1584,7 +1585,7 @@ int sock_no_bind(struct socket *, struct sockaddr *, int);
1584int sock_no_connect(struct socket *, struct sockaddr *, int, int); 1585int sock_no_connect(struct socket *, struct sockaddr *, int, int);
1585int sock_no_socketpair(struct socket *, struct socket *); 1586int sock_no_socketpair(struct socket *, struct socket *);
1586int sock_no_accept(struct socket *, struct socket *, int, bool); 1587int sock_no_accept(struct socket *, struct socket *, int, bool);
1587int sock_no_getname(struct socket *, struct sockaddr *, int *, int); 1588int sock_no_getname(struct socket *, struct sockaddr *, int);
1588__poll_t sock_no_poll(struct file *, struct socket *, 1589__poll_t sock_no_poll(struct file *, struct socket *,
1589 struct poll_table_struct *); 1590 struct poll_table_struct *);
1590int sock_no_ioctl(struct socket *, unsigned int, unsigned long); 1591int sock_no_ioctl(struct socket *, unsigned int, unsigned long);
@@ -1862,15 +1863,6 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags)
1862 sk->sk_route_caps &= ~flags; 1863 sk->sk_route_caps &= ~flags;
1863} 1864}
1864 1865
1865static inline bool sk_check_csum_caps(struct sock *sk)
1866{
1867 return (sk->sk_route_caps & NETIF_F_HW_CSUM) ||
1868 (sk->sk_family == PF_INET &&
1869 (sk->sk_route_caps & NETIF_F_IP_CSUM)) ||
1870 (sk->sk_family == PF_INET6 &&
1871 (sk->sk_route_caps & NETIF_F_IPV6_CSUM));
1872}
1873
1874static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb, 1866static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
1875 struct iov_iter *from, char *to, 1867 struct iov_iter *from, char *to,
1876 int copy, int offset) 1868 int copy, int offset)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e3fc667f9ac2..92b06c6e7732 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -374,7 +374,8 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
374 struct sk_buff *skb, 374 struct sk_buff *skb,
375 const struct tcphdr *th); 375 const struct tcphdr *th);
376struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, 376struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
377 struct request_sock *req, bool fastopen); 377 struct request_sock *req, bool fastopen,
378 bool *lost_race);
378int tcp_child_process(struct sock *parent, struct sock *child, 379int tcp_child_process(struct sock *parent, struct sock *child,
379 struct sk_buff *skb); 380 struct sk_buff *skb);
380void tcp_enter_loss(struct sock *sk); 381void tcp_enter_loss(struct sock *sk);
diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h
index 50e78a74d0df..2875e169d744 100644
--- a/include/net/tcp_states.h
+++ b/include/net/tcp_states.h
@@ -32,21 +32,21 @@ enum {
32 32
33#define TCP_STATE_MASK 0xF 33#define TCP_STATE_MASK 0xF
34 34
35#define TCP_ACTION_FIN (1 << 7) 35#define TCP_ACTION_FIN (1 << TCP_CLOSE)
36 36
37enum { 37enum {
38 TCPF_ESTABLISHED = (1 << 1), 38 TCPF_ESTABLISHED = (1 << TCP_ESTABLISHED),
39 TCPF_SYN_SENT = (1 << 2), 39 TCPF_SYN_SENT = (1 << TCP_SYN_SENT),
40 TCPF_SYN_RECV = (1 << 3), 40 TCPF_SYN_RECV = (1 << TCP_SYN_RECV),
41 TCPF_FIN_WAIT1 = (1 << 4), 41 TCPF_FIN_WAIT1 = (1 << TCP_FIN_WAIT1),
42 TCPF_FIN_WAIT2 = (1 << 5), 42 TCPF_FIN_WAIT2 = (1 << TCP_FIN_WAIT2),
43 TCPF_TIME_WAIT = (1 << 6), 43 TCPF_TIME_WAIT = (1 << TCP_TIME_WAIT),
44 TCPF_CLOSE = (1 << 7), 44 TCPF_CLOSE = (1 << TCP_CLOSE),
45 TCPF_CLOSE_WAIT = (1 << 8), 45 TCPF_CLOSE_WAIT = (1 << TCP_CLOSE_WAIT),
46 TCPF_LAST_ACK = (1 << 9), 46 TCPF_LAST_ACK = (1 << TCP_LAST_ACK),
47 TCPF_LISTEN = (1 << 10), 47 TCPF_LISTEN = (1 << TCP_LISTEN),
48 TCPF_CLOSING = (1 << 11), 48 TCPF_CLOSING = (1 << TCP_CLOSING),
49 TCPF_NEW_SYN_RECV = (1 << 12), 49 TCPF_NEW_SYN_RECV = (1 << TCP_NEW_SYN_RECV),
50}; 50};
51 51
52#endif /* _LINUX_TCP_STATES_H */ 52#endif /* _LINUX_TCP_STATES_H */
diff --git a/include/net/udplite.h b/include/net/udplite.h
index 81bdbf97319b..9185e45b997f 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -64,6 +64,7 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
64 UDP_SKB_CB(skb)->cscov = cscov; 64 UDP_SKB_CB(skb)->cscov = cscov;
65 if (skb->ip_summed == CHECKSUM_COMPLETE) 65 if (skb->ip_summed == CHECKSUM_COMPLETE)
66 skb->ip_summed = CHECKSUM_NONE; 66 skb->ip_summed = CHECKSUM_NONE;
67 skb->csum_valid = 0;
67 } 68 }
68 69
69 return 0; 70 return 0;
diff --git a/include/sound/ac97/regs.h b/include/sound/ac97/regs.h
index 4bb86d379bd5..9a4fa0c3264a 100644
--- a/include/sound/ac97/regs.h
+++ b/include/sound/ac97/regs.h
@@ -31,7 +31,7 @@
31#define AC97_HEADPHONE 0x04 /* Headphone Volume (optional) */ 31#define AC97_HEADPHONE 0x04 /* Headphone Volume (optional) */
32#define AC97_MASTER_MONO 0x06 /* Master Volume Mono (optional) */ 32#define AC97_MASTER_MONO 0x06 /* Master Volume Mono (optional) */
33#define AC97_MASTER_TONE 0x08 /* Master Tone (Bass & Treble) (optional) */ 33#define AC97_MASTER_TONE 0x08 /* Master Tone (Bass & Treble) (optional) */
34#define AC97_PC_BEEP 0x0a /* PC Beep Volume (optinal) */ 34#define AC97_PC_BEEP 0x0a /* PC Beep Volume (optional) */
35#define AC97_PHONE 0x0c /* Phone Volume (optional) */ 35#define AC97_PHONE 0x0c /* Phone Volume (optional) */
36#define AC97_MIC 0x0e /* MIC Volume */ 36#define AC97_MIC 0x0e /* MIC Volume */
37#define AC97_LINE 0x10 /* Line In Volume */ 37#define AC97_LINE 0x10 /* Line In Volume */
diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h
index b8adf05c534e..7dd8f34c37df 100644
--- a/include/trace/events/xen.h
+++ b/include/trace/events/xen.h
@@ -368,7 +368,7 @@ TRACE_EVENT(xen_mmu_flush_tlb,
368 TP_printk("%s", "") 368 TP_printk("%s", "")
369 ); 369 );
370 370
371TRACE_EVENT(xen_mmu_flush_tlb_single, 371TRACE_EVENT(xen_mmu_flush_tlb_one_user,
372 TP_PROTO(unsigned long addr), 372 TP_PROTO(unsigned long addr),
373 TP_ARGS(addr), 373 TP_ARGS(addr),
374 TP_STRUCT__entry( 374 TP_STRUCT__entry(
diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index dc64cfaf13da..28812eda4209 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -20,11 +20,13 @@ struct sock_extended_err {
20#define SO_EE_ORIGIN_ICMP6 3 20#define SO_EE_ORIGIN_ICMP6 3
21#define SO_EE_ORIGIN_TXSTATUS 4 21#define SO_EE_ORIGIN_TXSTATUS 4
22#define SO_EE_ORIGIN_ZEROCOPY 5 22#define SO_EE_ORIGIN_ZEROCOPY 5
23#define SO_EE_ORIGIN_ZCOOKIE 6
23#define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS 24#define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS
24 25
25#define SO_EE_OFFENDER(ee) ((struct sockaddr*)((ee)+1)) 26#define SO_EE_OFFENDER(ee) ((struct sockaddr*)((ee)+1))
26 27
27#define SO_EE_CODE_ZEROCOPY_COPIED 1 28#define SO_EE_CODE_ZEROCOPY_COPIED 1
29#define SO_EE_ORIGIN_MAX_ZCOOKIES 8
28 30
29/** 31/**
30 * struct scm_timestamping - timestamps exposed through cmsg 32 * struct scm_timestamping - timestamps exposed through cmsg
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 2b642bf9b5a0..925539172d5b 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -23,8 +23,8 @@ struct fib_rule_hdr {
23 __u8 tos; 23 __u8 tos;
24 24
25 __u8 table; 25 __u8 table;
26 __u8 proto;
26 __u8 res1; /* reserved */ 27 __u8 res1; /* reserved */
27 __u8 res2; /* reserved */
28 __u8 action; 28 __u8 action;
29 29
30 __u32 flags; 30 __u32 flags;
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index f8cb5760ea4f..8bbbcb5cd94b 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -23,7 +23,6 @@
23#define _UAPI_LINUX_IF_ETHER_H 23#define _UAPI_LINUX_IF_ETHER_H
24 24
25#include <linux/types.h> 25#include <linux/types.h>
26#include <linux/libc-compat.h>
27 26
28/* 27/*
29 * IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble 28 * IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble
@@ -151,6 +150,11 @@
151 * This is an Ethernet frame header. 150 * This is an Ethernet frame header.
152 */ 151 */
153 152
153/* allow libcs like musl to deactivate this, glibc does not implement this. */
154#ifndef __UAPI_DEF_ETHHDR
155#define __UAPI_DEF_ETHHDR 1
156#endif
157
154#if __UAPI_DEF_ETHHDR 158#if __UAPI_DEF_ETHHDR
155struct ethhdr { 159struct ethhdr {
156 unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ 160 unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 6d9447700e18..11d0c0ea2bfa 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -941,4 +941,22 @@ enum {
941 IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */ 941 IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */
942}; 942};
943 943
944/* tun section */
945
946enum {
947 IFLA_TUN_UNSPEC,
948 IFLA_TUN_OWNER,
949 IFLA_TUN_GROUP,
950 IFLA_TUN_TYPE,
951 IFLA_TUN_PI,
952 IFLA_TUN_VNET_HDR,
953 IFLA_TUN_PERSIST,
954 IFLA_TUN_MULTI_QUEUE,
955 IFLA_TUN_NUM_QUEUES,
956 IFLA_TUN_NUM_DISABLED_QUEUES,
957 __IFLA_TUN_MAX,
958};
959
960#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1)
961
944#endif /* _UAPI_LINUX_IF_LINK_H */ 962#endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h
index fc29efaa918c..8254c937c9f4 100644
--- a/include/uapi/linux/libc-compat.h
+++ b/include/uapi/linux/libc-compat.h
@@ -264,10 +264,4 @@
264 264
265#endif /* __GLIBC__ */ 265#endif /* __GLIBC__ */
266 266
267/* Definitions for if_ether.h */
268/* allow libcs like musl to deactivate this, glibc does not implement this. */
269#ifndef __UAPI_DEF_ETHHDR
270#define __UAPI_DEF_ETHHDR 1
271#endif
272
273#endif /* _UAPI_LIBC_COMPAT_H */ 267#endif /* _UAPI_LIBC_COMPAT_H */
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 46c506615f4a..7cafb26df555 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -555,7 +555,8 @@ enum {
555#define TCF_EM_VLAN 6 555#define TCF_EM_VLAN 6
556#define TCF_EM_CANID 7 556#define TCF_EM_CANID 7
557#define TCF_EM_IPSET 8 557#define TCF_EM_IPSET 8
558#define TCF_EM_MAX 8 558#define TCF_EM_IPT 9
559#define TCF_EM_MAX 9
559 560
560enum { 561enum {
561 TCF_EM_PROG_TC 562 TCF_EM_PROG_TC
diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
index e71d4491f225..12e3bca32cad 100644
--- a/include/uapi/linux/rds.h
+++ b/include/uapi/linux/rds.h
@@ -103,6 +103,7 @@
103#define RDS_CMSG_MASKED_ATOMIC_FADD 8 103#define RDS_CMSG_MASKED_ATOMIC_FADD 8
104#define RDS_CMSG_MASKED_ATOMIC_CSWP 9 104#define RDS_CMSG_MASKED_ATOMIC_CSWP 9
105#define RDS_CMSG_RXPATH_LATENCY 11 105#define RDS_CMSG_RXPATH_LATENCY 11
106#define RDS_CMSG_ZCOPY_COOKIE 12
106 107
107#define RDS_INFO_FIRST 10000 108#define RDS_INFO_FIRST 10000
108#define RDS_INFO_COUNTERS 10000 109#define RDS_INFO_COUNTERS 10000
diff --git a/include/uapi/linux/tc_ematch/tc_em_ipt.h b/include/uapi/linux/tc_ematch/tc_em_ipt.h
new file mode 100644
index 000000000000..49a65530992c
--- /dev/null
+++ b/include/uapi/linux/tc_ematch/tc_em_ipt.h
@@ -0,0 +1,20 @@
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2#ifndef __LINUX_TC_EM_IPT_H
3#define __LINUX_TC_EM_IPT_H
4
5#include <linux/types.h>
6#include <linux/pkt_cls.h>
7
8enum {
9 TCA_EM_IPT_UNSPEC,
10 TCA_EM_IPT_HOOK,
11 TCA_EM_IPT_MATCH_NAME,
12 TCA_EM_IPT_MATCH_REVISION,
13 TCA_EM_IPT_NFPROTO,
14 TCA_EM_IPT_MATCH_DATA,
15 __TCA_EM_IPT_MAX
16};
17
18#define TCA_EM_IPT_MAX (__TCA_EM_IPT_MAX - 1)
19
20#endif
diff --git a/kernel/audit.c b/kernel/audit.c
index 227db99b0f19..5e49b614d0e6 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1526,6 +1526,7 @@ static struct pernet_operations audit_net_ops __net_initdata = {
1526 .exit = audit_net_exit, 1526 .exit = audit_net_exit,
1527 .id = &audit_net_id, 1527 .id = &audit_net_id,
1528 .size = sizeof(struct audit_net), 1528 .size = sizeof(struct audit_net),
1529 .async = true,
1529}; 1530};
1530 1531
1531/* Initialize audit support at boot time. */ 1532/* Initialize audit support at boot time. */
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index e6a9c36470ee..82b8b18ee1eb 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1726,25 +1726,14 @@ static int irq_domain_debug_show(struct seq_file *m, void *p)
1726 irq_domain_debug_show_one(m, d, 0); 1726 irq_domain_debug_show_one(m, d, 0);
1727 return 0; 1727 return 0;
1728} 1728}
1729 1729DEFINE_SHOW_ATTRIBUTE(irq_domain_debug);
1730static int irq_domain_debug_open(struct inode *inode, struct file *file)
1731{
1732 return single_open(file, irq_domain_debug_show, inode->i_private);
1733}
1734
1735static const struct file_operations dfs_domain_ops = {
1736 .open = irq_domain_debug_open,
1737 .read = seq_read,
1738 .llseek = seq_lseek,
1739 .release = single_release,
1740};
1741 1730
1742static void debugfs_add_domain_dir(struct irq_domain *d) 1731static void debugfs_add_domain_dir(struct irq_domain *d)
1743{ 1732{
1744 if (!d->name || !domain_dir || d->debugfs_file) 1733 if (!d->name || !domain_dir || d->debugfs_file)
1745 return; 1734 return;
1746 d->debugfs_file = debugfs_create_file(d->name, 0444, domain_dir, d, 1735 d->debugfs_file = debugfs_create_file(d->name, 0444, domain_dir, d,
1747 &dfs_domain_ops); 1736 &irq_domain_debug_fops);
1748} 1737}
1749 1738
1750static void debugfs_remove_domain_dir(struct irq_domain *d) 1739static void debugfs_remove_domain_dir(struct irq_domain *d)
@@ -1760,7 +1749,8 @@ void __init irq_domain_debugfs_init(struct dentry *root)
1760 if (!domain_dir) 1749 if (!domain_dir)
1761 return; 1750 return;
1762 1751
1763 debugfs_create_file("default", 0444, domain_dir, NULL, &dfs_domain_ops); 1752 debugfs_create_file("default", 0444, domain_dir, NULL,
1753 &irq_domain_debug_fops);
1764 mutex_lock(&irq_domain_mutex); 1754 mutex_lock(&irq_domain_mutex);
1765 list_for_each_entry(d, &irq_domain_list, link) 1755 list_for_each_entry(d, &irq_domain_list, link)
1766 debugfs_add_domain_dir(d); 1756 debugfs_add_domain_dir(d);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index da2ccf142358..102160ff5c66 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -978,67 +978,90 @@ static int prepare_kprobe(struct kprobe *p)
978} 978}
979 979
980/* Caller must lock kprobe_mutex */ 980/* Caller must lock kprobe_mutex */
981static void arm_kprobe_ftrace(struct kprobe *p) 981static int arm_kprobe_ftrace(struct kprobe *p)
982{ 982{
983 int ret; 983 int ret = 0;
984 984
985 ret = ftrace_set_filter_ip(&kprobe_ftrace_ops, 985 ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
986 (unsigned long)p->addr, 0, 0); 986 (unsigned long)p->addr, 0, 0);
987 WARN(ret < 0, "Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret); 987 if (ret) {
988 kprobe_ftrace_enabled++; 988 pr_debug("Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret);
989 if (kprobe_ftrace_enabled == 1) { 989 return ret;
990 }
991
992 if (kprobe_ftrace_enabled == 0) {
990 ret = register_ftrace_function(&kprobe_ftrace_ops); 993 ret = register_ftrace_function(&kprobe_ftrace_ops);
991 WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret); 994 if (ret) {
995 pr_debug("Failed to init kprobe-ftrace (%d)\n", ret);
996 goto err_ftrace;
997 }
992 } 998 }
999
1000 kprobe_ftrace_enabled++;
1001 return ret;
1002
1003err_ftrace:
1004 /*
1005 * Note: Since kprobe_ftrace_ops has IPMODIFY set, and ftrace requires a
1006 * non-empty filter_hash for IPMODIFY ops, we're safe from an accidental
1007 * empty filter_hash which would undesirably trace all functions.
1008 */
1009 ftrace_set_filter_ip(&kprobe_ftrace_ops, (unsigned long)p->addr, 1, 0);
1010 return ret;
993} 1011}
994 1012
995/* Caller must lock kprobe_mutex */ 1013/* Caller must lock kprobe_mutex */
996static void disarm_kprobe_ftrace(struct kprobe *p) 1014static int disarm_kprobe_ftrace(struct kprobe *p)
997{ 1015{
998 int ret; 1016 int ret = 0;
999 1017
1000 kprobe_ftrace_enabled--; 1018 if (kprobe_ftrace_enabled == 1) {
1001 if (kprobe_ftrace_enabled == 0) {
1002 ret = unregister_ftrace_function(&kprobe_ftrace_ops); 1019 ret = unregister_ftrace_function(&kprobe_ftrace_ops);
1003 WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret); 1020 if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (%d)\n", ret))
1021 return ret;
1004 } 1022 }
1023
1024 kprobe_ftrace_enabled--;
1025
1005 ret = ftrace_set_filter_ip(&kprobe_ftrace_ops, 1026 ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
1006 (unsigned long)p->addr, 1, 0); 1027 (unsigned long)p->addr, 1, 0);
1007 WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret); 1028 WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
1029 return ret;
1008} 1030}
1009#else /* !CONFIG_KPROBES_ON_FTRACE */ 1031#else /* !CONFIG_KPROBES_ON_FTRACE */
1010#define prepare_kprobe(p) arch_prepare_kprobe(p) 1032#define prepare_kprobe(p) arch_prepare_kprobe(p)
1011#define arm_kprobe_ftrace(p) do {} while (0) 1033#define arm_kprobe_ftrace(p) (-ENODEV)
1012#define disarm_kprobe_ftrace(p) do {} while (0) 1034#define disarm_kprobe_ftrace(p) (-ENODEV)
1013#endif 1035#endif
1014 1036
1015/* Arm a kprobe with text_mutex */ 1037/* Arm a kprobe with text_mutex */
1016static void arm_kprobe(struct kprobe *kp) 1038static int arm_kprobe(struct kprobe *kp)
1017{ 1039{
1018 if (unlikely(kprobe_ftrace(kp))) { 1040 if (unlikely(kprobe_ftrace(kp)))
1019 arm_kprobe_ftrace(kp); 1041 return arm_kprobe_ftrace(kp);
1020 return; 1042
1021 }
1022 cpus_read_lock(); 1043 cpus_read_lock();
1023 mutex_lock(&text_mutex); 1044 mutex_lock(&text_mutex);
1024 __arm_kprobe(kp); 1045 __arm_kprobe(kp);
1025 mutex_unlock(&text_mutex); 1046 mutex_unlock(&text_mutex);
1026 cpus_read_unlock(); 1047 cpus_read_unlock();
1048
1049 return 0;
1027} 1050}
1028 1051
1029/* Disarm a kprobe with text_mutex */ 1052/* Disarm a kprobe with text_mutex */
1030static void disarm_kprobe(struct kprobe *kp, bool reopt) 1053static int disarm_kprobe(struct kprobe *kp, bool reopt)
1031{ 1054{
1032 if (unlikely(kprobe_ftrace(kp))) { 1055 if (unlikely(kprobe_ftrace(kp)))
1033 disarm_kprobe_ftrace(kp); 1056 return disarm_kprobe_ftrace(kp);
1034 return;
1035 }
1036 1057
1037 cpus_read_lock(); 1058 cpus_read_lock();
1038 mutex_lock(&text_mutex); 1059 mutex_lock(&text_mutex);
1039 __disarm_kprobe(kp, reopt); 1060 __disarm_kprobe(kp, reopt);
1040 mutex_unlock(&text_mutex); 1061 mutex_unlock(&text_mutex);
1041 cpus_read_unlock(); 1062 cpus_read_unlock();
1063
1064 return 0;
1042} 1065}
1043 1066
1044/* 1067/*
@@ -1362,9 +1385,15 @@ out:
1362 1385
1363 if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) { 1386 if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
1364 ap->flags &= ~KPROBE_FLAG_DISABLED; 1387 ap->flags &= ~KPROBE_FLAG_DISABLED;
1365 if (!kprobes_all_disarmed) 1388 if (!kprobes_all_disarmed) {
1366 /* Arm the breakpoint again. */ 1389 /* Arm the breakpoint again. */
1367 arm_kprobe(ap); 1390 ret = arm_kprobe(ap);
1391 if (ret) {
1392 ap->flags |= KPROBE_FLAG_DISABLED;
1393 list_del_rcu(&p->list);
1394 synchronize_sched();
1395 }
1396 }
1368 } 1397 }
1369 return ret; 1398 return ret;
1370} 1399}
@@ -1573,8 +1602,14 @@ int register_kprobe(struct kprobe *p)
1573 hlist_add_head_rcu(&p->hlist, 1602 hlist_add_head_rcu(&p->hlist,
1574 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); 1603 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
1575 1604
1576 if (!kprobes_all_disarmed && !kprobe_disabled(p)) 1605 if (!kprobes_all_disarmed && !kprobe_disabled(p)) {
1577 arm_kprobe(p); 1606 ret = arm_kprobe(p);
1607 if (ret) {
1608 hlist_del_rcu(&p->hlist);
1609 synchronize_sched();
1610 goto out;
1611 }
1612 }
1578 1613
1579 /* Try to optimize kprobe */ 1614 /* Try to optimize kprobe */
1580 try_to_optimize_kprobe(p); 1615 try_to_optimize_kprobe(p);
@@ -1608,11 +1643,12 @@ static int aggr_kprobe_disabled(struct kprobe *ap)
1608static struct kprobe *__disable_kprobe(struct kprobe *p) 1643static struct kprobe *__disable_kprobe(struct kprobe *p)
1609{ 1644{
1610 struct kprobe *orig_p; 1645 struct kprobe *orig_p;
1646 int ret;
1611 1647
1612 /* Get an original kprobe for return */ 1648 /* Get an original kprobe for return */
1613 orig_p = __get_valid_kprobe(p); 1649 orig_p = __get_valid_kprobe(p);
1614 if (unlikely(orig_p == NULL)) 1650 if (unlikely(orig_p == NULL))
1615 return NULL; 1651 return ERR_PTR(-EINVAL);
1616 1652
1617 if (!kprobe_disabled(p)) { 1653 if (!kprobe_disabled(p)) {
1618 /* Disable probe if it is a child probe */ 1654 /* Disable probe if it is a child probe */
@@ -1626,8 +1662,13 @@ static struct kprobe *__disable_kprobe(struct kprobe *p)
1626 * should have already been disarmed, so 1662 * should have already been disarmed, so
1627 * skip unneed disarming process. 1663 * skip unneed disarming process.
1628 */ 1664 */
1629 if (!kprobes_all_disarmed) 1665 if (!kprobes_all_disarmed) {
1630 disarm_kprobe(orig_p, true); 1666 ret = disarm_kprobe(orig_p, true);
1667 if (ret) {
1668 p->flags &= ~KPROBE_FLAG_DISABLED;
1669 return ERR_PTR(ret);
1670 }
1671 }
1631 orig_p->flags |= KPROBE_FLAG_DISABLED; 1672 orig_p->flags |= KPROBE_FLAG_DISABLED;
1632 } 1673 }
1633 } 1674 }
@@ -1644,8 +1685,8 @@ static int __unregister_kprobe_top(struct kprobe *p)
1644 1685
1645 /* Disable kprobe. This will disarm it if needed. */ 1686 /* Disable kprobe. This will disarm it if needed. */
1646 ap = __disable_kprobe(p); 1687 ap = __disable_kprobe(p);
1647 if (ap == NULL) 1688 if (IS_ERR(ap))
1648 return -EINVAL; 1689 return PTR_ERR(ap);
1649 1690
1650 if (ap == p) 1691 if (ap == p)
1651 /* 1692 /*
@@ -2078,12 +2119,14 @@ static void kill_kprobe(struct kprobe *p)
2078int disable_kprobe(struct kprobe *kp) 2119int disable_kprobe(struct kprobe *kp)
2079{ 2120{
2080 int ret = 0; 2121 int ret = 0;
2122 struct kprobe *p;
2081 2123
2082 mutex_lock(&kprobe_mutex); 2124 mutex_lock(&kprobe_mutex);
2083 2125
2084 /* Disable this kprobe */ 2126 /* Disable this kprobe */
2085 if (__disable_kprobe(kp) == NULL) 2127 p = __disable_kprobe(kp);
2086 ret = -EINVAL; 2128 if (IS_ERR(p))
2129 ret = PTR_ERR(p);
2087 2130
2088 mutex_unlock(&kprobe_mutex); 2131 mutex_unlock(&kprobe_mutex);
2089 return ret; 2132 return ret;
@@ -2116,7 +2159,9 @@ int enable_kprobe(struct kprobe *kp)
2116 2159
2117 if (!kprobes_all_disarmed && kprobe_disabled(p)) { 2160 if (!kprobes_all_disarmed && kprobe_disabled(p)) {
2118 p->flags &= ~KPROBE_FLAG_DISABLED; 2161 p->flags &= ~KPROBE_FLAG_DISABLED;
2119 arm_kprobe(p); 2162 ret = arm_kprobe(p);
2163 if (ret)
2164 p->flags |= KPROBE_FLAG_DISABLED;
2120 } 2165 }
2121out: 2166out:
2122 mutex_unlock(&kprobe_mutex); 2167 mutex_unlock(&kprobe_mutex);
@@ -2407,11 +2452,12 @@ static const struct file_operations debugfs_kprobe_blacklist_ops = {
2407 .release = seq_release, 2452 .release = seq_release,
2408}; 2453};
2409 2454
2410static void arm_all_kprobes(void) 2455static int arm_all_kprobes(void)
2411{ 2456{
2412 struct hlist_head *head; 2457 struct hlist_head *head;
2413 struct kprobe *p; 2458 struct kprobe *p;
2414 unsigned int i; 2459 unsigned int i, total = 0, errors = 0;
2460 int err, ret = 0;
2415 2461
2416 mutex_lock(&kprobe_mutex); 2462 mutex_lock(&kprobe_mutex);
2417 2463
@@ -2428,46 +2474,74 @@ static void arm_all_kprobes(void)
2428 /* Arming kprobes doesn't optimize kprobe itself */ 2474 /* Arming kprobes doesn't optimize kprobe itself */
2429 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 2475 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2430 head = &kprobe_table[i]; 2476 head = &kprobe_table[i];
2431 hlist_for_each_entry_rcu(p, head, hlist) 2477 /* Arm all kprobes on a best-effort basis */
2432 if (!kprobe_disabled(p)) 2478 hlist_for_each_entry_rcu(p, head, hlist) {
2433 arm_kprobe(p); 2479 if (!kprobe_disabled(p)) {
2480 err = arm_kprobe(p);
2481 if (err) {
2482 errors++;
2483 ret = err;
2484 }
2485 total++;
2486 }
2487 }
2434 } 2488 }
2435 2489
2436 printk(KERN_INFO "Kprobes globally enabled\n"); 2490 if (errors)
2491 pr_warn("Kprobes globally enabled, but failed to arm %d out of %d probes\n",
2492 errors, total);
2493 else
2494 pr_info("Kprobes globally enabled\n");
2437 2495
2438already_enabled: 2496already_enabled:
2439 mutex_unlock(&kprobe_mutex); 2497 mutex_unlock(&kprobe_mutex);
2440 return; 2498 return ret;
2441} 2499}
2442 2500
2443static void disarm_all_kprobes(void) 2501static int disarm_all_kprobes(void)
2444{ 2502{
2445 struct hlist_head *head; 2503 struct hlist_head *head;
2446 struct kprobe *p; 2504 struct kprobe *p;
2447 unsigned int i; 2505 unsigned int i, total = 0, errors = 0;
2506 int err, ret = 0;
2448 2507
2449 mutex_lock(&kprobe_mutex); 2508 mutex_lock(&kprobe_mutex);
2450 2509
2451 /* If kprobes are already disarmed, just return */ 2510 /* If kprobes are already disarmed, just return */
2452 if (kprobes_all_disarmed) { 2511 if (kprobes_all_disarmed) {
2453 mutex_unlock(&kprobe_mutex); 2512 mutex_unlock(&kprobe_mutex);
2454 return; 2513 return 0;
2455 } 2514 }
2456 2515
2457 kprobes_all_disarmed = true; 2516 kprobes_all_disarmed = true;
2458 printk(KERN_INFO "Kprobes globally disabled\n");
2459 2517
2460 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 2518 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2461 head = &kprobe_table[i]; 2519 head = &kprobe_table[i];
2520 /* Disarm all kprobes on a best-effort basis */
2462 hlist_for_each_entry_rcu(p, head, hlist) { 2521 hlist_for_each_entry_rcu(p, head, hlist) {
2463 if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) 2522 if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) {
2464 disarm_kprobe(p, false); 2523 err = disarm_kprobe(p, false);
2524 if (err) {
2525 errors++;
2526 ret = err;
2527 }
2528 total++;
2529 }
2465 } 2530 }
2466 } 2531 }
2532
2533 if (errors)
2534 pr_warn("Kprobes globally disabled, but failed to disarm %d out of %d probes\n",
2535 errors, total);
2536 else
2537 pr_info("Kprobes globally disabled\n");
2538
2467 mutex_unlock(&kprobe_mutex); 2539 mutex_unlock(&kprobe_mutex);
2468 2540
2469 /* Wait for disarming all kprobes by optimizer */ 2541 /* Wait for disarming all kprobes by optimizer */
2470 wait_for_kprobe_optimizer(); 2542 wait_for_kprobe_optimizer();
2543
2544 return ret;
2471} 2545}
2472 2546
2473/* 2547/*
@@ -2494,6 +2568,7 @@ static ssize_t write_enabled_file_bool(struct file *file,
2494{ 2568{
2495 char buf[32]; 2569 char buf[32];
2496 size_t buf_size; 2570 size_t buf_size;
2571 int ret = 0;
2497 2572
2498 buf_size = min(count, (sizeof(buf)-1)); 2573 buf_size = min(count, (sizeof(buf)-1));
2499 if (copy_from_user(buf, user_buf, buf_size)) 2574 if (copy_from_user(buf, user_buf, buf_size))
@@ -2504,17 +2579,20 @@ static ssize_t write_enabled_file_bool(struct file *file,
2504 case 'y': 2579 case 'y':
2505 case 'Y': 2580 case 'Y':
2506 case '1': 2581 case '1':
2507 arm_all_kprobes(); 2582 ret = arm_all_kprobes();
2508 break; 2583 break;
2509 case 'n': 2584 case 'n':
2510 case 'N': 2585 case 'N':
2511 case '0': 2586 case '0':
2512 disarm_all_kprobes(); 2587 ret = disarm_all_kprobes();
2513 break; 2588 break;
2514 default: 2589 default:
2515 return -EINVAL; 2590 return -EINVAL;
2516 } 2591 }
2517 2592
2593 if (ret)
2594 return ret;
2595
2518 return count; 2596 return count;
2519} 2597}
2520 2598
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 38ece035039e..d880296245c5 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -379,6 +379,14 @@ queue:
379 tail = encode_tail(smp_processor_id(), idx); 379 tail = encode_tail(smp_processor_id(), idx);
380 380
381 node += idx; 381 node += idx;
382
383 /*
384 * Ensure that we increment the head node->count before initialising
385 * the actual node. If the compiler is kind enough to reorder these
386 * stores, then an IRQ could overwrite our assignments.
387 */
388 barrier();
389
382 node->locked = 0; 390 node->locked = 0;
383 node->next = NULL; 391 node->next = NULL;
384 pv_init_node(node); 392 pv_init_node(node);
@@ -408,14 +416,15 @@ queue:
408 */ 416 */
409 if (old & _Q_TAIL_MASK) { 417 if (old & _Q_TAIL_MASK) {
410 prev = decode_tail(old); 418 prev = decode_tail(old);
419
411 /* 420 /*
412 * The above xchg_tail() is also a load of @lock which 421 * We must ensure that the stores to @node are observed before
413 * generates, through decode_tail(), a pointer. The address 422 * the write to prev->next. The address dependency from
414 * dependency matches the RELEASE of xchg_tail() such that 423 * xchg_tail is not sufficient to ensure this because the read
415 * the subsequent access to @prev happens after. 424 * component of xchg_tail is unordered with respect to the
425 * initialisation of @node.
416 */ 426 */
417 427 smp_store_release(&prev->next, node);
418 WRITE_ONCE(prev->next, node);
419 428
420 pv_wait_node(node, prev); 429 pv_wait_node(node, prev);
421 arch_mcs_spin_lock_contended(&node->locked); 430 arch_mcs_spin_lock_contended(&node->locked);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bf724c1952ea..e7c535eee0a6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2601,19 +2601,31 @@ static inline void finish_task(struct task_struct *prev)
2601#endif 2601#endif
2602} 2602}
2603 2603
2604static inline void finish_lock_switch(struct rq *rq) 2604static inline void
2605prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
2605{ 2606{
2607 /*
2608 * Since the runqueue lock will be released by the next
2609 * task (which is an invalid locking op but in the case
2610 * of the scheduler it's an obvious special-case), so we
2611 * do an early lockdep release here:
2612 */
2613 rq_unpin_lock(rq, rf);
2614 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
2606#ifdef CONFIG_DEBUG_SPINLOCK 2615#ifdef CONFIG_DEBUG_SPINLOCK
2607 /* this is a valid case when another task releases the spinlock */ 2616 /* this is a valid case when another task releases the spinlock */
2608 rq->lock.owner = current; 2617 rq->lock.owner = next;
2609#endif 2618#endif
2619}
2620
2621static inline void finish_lock_switch(struct rq *rq)
2622{
2610 /* 2623 /*
2611 * If we are tracking spinlock dependencies then we have to 2624 * If we are tracking spinlock dependencies then we have to
2612 * fix up the runqueue lock - which gets 'carried over' from 2625 * fix up the runqueue lock - which gets 'carried over' from
2613 * prev into current: 2626 * prev into current:
2614 */ 2627 */
2615 spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); 2628 spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
2616
2617 raw_spin_unlock_irq(&rq->lock); 2629 raw_spin_unlock_irq(&rq->lock);
2618} 2630}
2619 2631
@@ -2844,14 +2856,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
2844 2856
2845 rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); 2857 rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
2846 2858
2847 /* 2859 prepare_lock_switch(rq, next, rf);
2848 * Since the runqueue lock will be released by the next
2849 * task (which is an invalid locking op but in the case
2850 * of the scheduler it's an obvious special-case), so we
2851 * do an early lockdep release here:
2852 */
2853 rq_unpin_lock(rq, rf);
2854 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
2855 2860
2856 /* Here we just switch the register state and the stack. */ 2861 /* Here we just switch the register state and the stack. */
2857 switch_to(prev, next, prev); 2862 switch_to(prev, next, prev);
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index dd062a1c8cf0..7936f548e071 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -19,8 +19,6 @@
19 19
20#include "sched.h" 20#include "sched.h"
21 21
22#define SUGOV_KTHREAD_PRIORITY 50
23
24struct sugov_tunables { 22struct sugov_tunables {
25 struct gov_attr_set attr_set; 23 struct gov_attr_set attr_set;
26 unsigned int rate_limit_us; 24 unsigned int rate_limit_us;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 9bb0e0c412ec..9df09782025c 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1153,6 +1153,7 @@ static void update_curr_dl(struct rq *rq)
1153 struct sched_dl_entity *dl_se = &curr->dl; 1153 struct sched_dl_entity *dl_se = &curr->dl;
1154 u64 delta_exec, scaled_delta_exec; 1154 u64 delta_exec, scaled_delta_exec;
1155 int cpu = cpu_of(rq); 1155 int cpu = cpu_of(rq);
1156 u64 now;
1156 1157
1157 if (!dl_task(curr) || !on_dl_rq(dl_se)) 1158 if (!dl_task(curr) || !on_dl_rq(dl_se))
1158 return; 1159 return;
@@ -1165,7 +1166,8 @@ static void update_curr_dl(struct rq *rq)
1165 * natural solution, but the full ramifications of this 1166 * natural solution, but the full ramifications of this
1166 * approach need further study. 1167 * approach need further study.
1167 */ 1168 */
1168 delta_exec = rq_clock_task(rq) - curr->se.exec_start; 1169 now = rq_clock_task(rq);
1170 delta_exec = now - curr->se.exec_start;
1169 if (unlikely((s64)delta_exec <= 0)) { 1171 if (unlikely((s64)delta_exec <= 0)) {
1170 if (unlikely(dl_se->dl_yielded)) 1172 if (unlikely(dl_se->dl_yielded))
1171 goto throttle; 1173 goto throttle;
@@ -1178,7 +1180,7 @@ static void update_curr_dl(struct rq *rq)
1178 curr->se.sum_exec_runtime += delta_exec; 1180 curr->se.sum_exec_runtime += delta_exec;
1179 account_group_exec_runtime(curr, delta_exec); 1181 account_group_exec_runtime(curr, delta_exec);
1180 1182
1181 curr->se.exec_start = rq_clock_task(rq); 1183 curr->se.exec_start = now;
1182 cgroup_account_cputime(curr, delta_exec); 1184 cgroup_account_cputime(curr, delta_exec);
1183 1185
1184 sched_rt_avg_update(rq, delta_exec); 1186 sched_rt_avg_update(rq, delta_exec);
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 663b2355a3aa..aad49451584e 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -950,12 +950,13 @@ static void update_curr_rt(struct rq *rq)
950{ 950{
951 struct task_struct *curr = rq->curr; 951 struct task_struct *curr = rq->curr;
952 struct sched_rt_entity *rt_se = &curr->rt; 952 struct sched_rt_entity *rt_se = &curr->rt;
953 u64 now = rq_clock_task(rq);
954 u64 delta_exec; 953 u64 delta_exec;
954 u64 now;
955 955
956 if (curr->sched_class != &rt_sched_class) 956 if (curr->sched_class != &rt_sched_class)
957 return; 957 return;
958 958
959 now = rq_clock_task(rq);
959 delta_exec = now - curr->se.exec_start; 960 delta_exec = now - curr->se.exec_start;
960 if (unlikely((s64)delta_exec <= 0)) 961 if (unlikely((s64)delta_exec <= 0))
961 return; 962 return;
diff --git a/lib/dma-direct.c b/lib/dma-direct.c
index 40b1f92f2214..c9e8e21cb334 100644
--- a/lib/dma-direct.c
+++ b/lib/dma-direct.c
@@ -84,6 +84,10 @@ again:
84 return page_address(page); 84 return page_address(page);
85} 85}
86 86
87/*
88 * NOTE: this function must never look at the dma_addr argument, because we want
89 * to be able to use it as a helper for iommu implementations as well.
90 */
87void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, 91void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
88 dma_addr_t dma_addr, unsigned long attrs) 92 dma_addr_t dma_addr, unsigned long attrs)
89{ 93{
@@ -152,5 +156,6 @@ const struct dma_map_ops dma_direct_ops = {
152 .map_sg = dma_direct_map_sg, 156 .map_sg = dma_direct_map_sg,
153 .dma_supported = dma_direct_supported, 157 .dma_supported = dma_direct_supported,
154 .mapping_error = dma_direct_mapping_error, 158 .mapping_error = dma_direct_mapping_error,
159 .is_phys = 1,
155}; 160};
156EXPORT_SYMBOL(dma_direct_ops); 161EXPORT_SYMBOL(dma_direct_ops);
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 9fe6ec8fda28..9539d7ab3ea8 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -650,6 +650,7 @@ found:
650static struct pernet_operations uevent_net_ops = { 650static struct pernet_operations uevent_net_ops = {
651 .init = uevent_net_init, 651 .init = uevent_net_init,
652 .exit = uevent_net_exit, 652 .exit = uevent_net_exit,
653 .async = true,
653}; 654};
654 655
655static int __init kobject_uevent_init(void) 656static int __init kobject_uevent_init(void)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 4b80ccee4535..8291b75f42c8 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1139,8 +1139,6 @@ int memory_failure(unsigned long pfn, int flags)
1139 return 0; 1139 return 0;
1140 } 1140 }
1141 1141
1142 arch_unmap_kpfn(pfn);
1143
1144 orig_head = hpage = compound_head(p); 1142 orig_head = hpage = compound_head(p);
1145 num_poisoned_pages_inc(); 1143 num_poisoned_pages_inc();
1146 1144
diff --git a/mm/memory.c b/mm/memory.c
index dd8de96f5547..5fcfc24904d1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -80,7 +80,7 @@
80 80
81#include "internal.h" 81#include "internal.h"
82 82
83#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS 83#if defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) && !defined(CONFIG_COMPILE_TEST)
84#warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid. 84#warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
85#endif 85#endif
86 86
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index f3a4efcf1456..3aa5a93ad107 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -160,7 +160,8 @@ static void req_done(struct virtqueue *vq)
160 spin_unlock_irqrestore(&chan->lock, flags); 160 spin_unlock_irqrestore(&chan->lock, flags);
161 /* Wakeup if anyone waiting for VirtIO ring space. */ 161 /* Wakeup if anyone waiting for VirtIO ring space. */
162 wake_up(chan->vc_wq); 162 wake_up(chan->vc_wq);
163 p9_client_cb(chan->client, req, REQ_STATUS_RCVD); 163 if (len)
164 p9_client_cb(chan->client, req, REQ_STATUS_RCVD);
164 } 165 }
165} 166}
166 167
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 03a9fc0771c0..9b6bc5abe946 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1238,7 +1238,7 @@ out:
1238 * fields into the sockaddr. 1238 * fields into the sockaddr.
1239 */ 1239 */
1240static int atalk_getname(struct socket *sock, struct sockaddr *uaddr, 1240static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
1241 int *uaddr_len, int peer) 1241 int peer)
1242{ 1242{
1243 struct sockaddr_at sat; 1243 struct sockaddr_at sat;
1244 struct sock *sk = sock->sk; 1244 struct sock *sk = sock->sk;
@@ -1251,7 +1251,6 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
1251 if (atalk_autobind(sk) < 0) 1251 if (atalk_autobind(sk) < 0)
1252 goto out; 1252 goto out;
1253 1253
1254 *uaddr_len = sizeof(struct sockaddr_at);
1255 memset(&sat, 0, sizeof(sat)); 1254 memset(&sat, 0, sizeof(sat));
1256 1255
1257 if (peer) { 1256 if (peer) {
@@ -1268,9 +1267,9 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
1268 sat.sat_port = at->src_port; 1267 sat.sat_port = at->src_port;
1269 } 1268 }
1270 1269
1271 err = 0;
1272 sat.sat_family = AF_APPLETALK; 1270 sat.sat_family = AF_APPLETALK;
1273 memcpy(uaddr, &sat, sizeof(sat)); 1271 memcpy(uaddr, &sat, sizeof(sat));
1272 err = sizeof(struct sockaddr_at);
1274 1273
1275out: 1274out:
1276 release_sock(sk); 1275 release_sock(sk);
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index e1140b3bdcaa..2cb10af16afc 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -87,21 +87,20 @@ static int pvc_getsockopt(struct socket *sock, int level, int optname,
87} 87}
88 88
89static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr, 89static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr,
90 int *sockaddr_len, int peer) 90 int peer)
91{ 91{
92 struct sockaddr_atmpvc *addr; 92 struct sockaddr_atmpvc *addr;
93 struct atm_vcc *vcc = ATM_SD(sock); 93 struct atm_vcc *vcc = ATM_SD(sock);
94 94
95 if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags)) 95 if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags))
96 return -ENOTCONN; 96 return -ENOTCONN;
97 *sockaddr_len = sizeof(struct sockaddr_atmpvc);
98 addr = (struct sockaddr_atmpvc *)sockaddr; 97 addr = (struct sockaddr_atmpvc *)sockaddr;
99 memset(addr, 0, sizeof(*addr)); 98 memset(addr, 0, sizeof(*addr));
100 addr->sap_family = AF_ATMPVC; 99 addr->sap_family = AF_ATMPVC;
101 addr->sap_addr.itf = vcc->dev->number; 100 addr->sap_addr.itf = vcc->dev->number;
102 addr->sap_addr.vpi = vcc->vpi; 101 addr->sap_addr.vpi = vcc->vpi;
103 addr->sap_addr.vci = vcc->vci; 102 addr->sap_addr.vci = vcc->vci;
104 return 0; 103 return sizeof(struct sockaddr_atmpvc);
105} 104}
106 105
107static const struct proto_ops pvc_proto_ops = { 106static const struct proto_ops pvc_proto_ops = {
diff --git a/net/atm/svc.c b/net/atm/svc.c
index c458adcbc177..2f91b766ac42 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -419,15 +419,14 @@ out:
419} 419}
420 420
421static int svc_getname(struct socket *sock, struct sockaddr *sockaddr, 421static int svc_getname(struct socket *sock, struct sockaddr *sockaddr,
422 int *sockaddr_len, int peer) 422 int peer)
423{ 423{
424 struct sockaddr_atmsvc *addr; 424 struct sockaddr_atmsvc *addr;
425 425
426 *sockaddr_len = sizeof(struct sockaddr_atmsvc);
427 addr = (struct sockaddr_atmsvc *) sockaddr; 426 addr = (struct sockaddr_atmsvc *) sockaddr;
428 memcpy(addr, peer ? &ATM_SD(sock)->remote : &ATM_SD(sock)->local, 427 memcpy(addr, peer ? &ATM_SD(sock)->remote : &ATM_SD(sock)->local,
429 sizeof(struct sockaddr_atmsvc)); 428 sizeof(struct sockaddr_atmsvc));
430 return 0; 429 return sizeof(struct sockaddr_atmsvc);
431} 430}
432 431
433int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos) 432int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos)
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 47fdd399626b..c8319ed48485 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1388,7 +1388,7 @@ out:
1388} 1388}
1389 1389
1390static int ax25_getname(struct socket *sock, struct sockaddr *uaddr, 1390static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
1391 int *uaddr_len, int peer) 1391 int peer)
1392{ 1392{
1393 struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr; 1393 struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr;
1394 struct sock *sk = sock->sk; 1394 struct sock *sk = sock->sk;
@@ -1427,7 +1427,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
1427 fsa->fsa_digipeater[0] = null_ax25_address; 1427 fsa->fsa_digipeater[0] = null_ax25_address;
1428 } 1428 }
1429 } 1429 }
1430 *uaddr_len = sizeof (struct full_sockaddr_ax25); 1430 err = sizeof (struct full_sockaddr_ax25);
1431 1431
1432out: 1432out:
1433 release_sock(sk); 1433 release_sock(sk);
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 3394e6791673..66c0781773df 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -934,8 +934,8 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
934 /* Slave connection state and connectable mode bit 38 934 /* Slave connection state and connectable mode bit 38
935 * and scannable bit 21. 935 * and scannable bit 21.
936 */ 936 */
937 if (connectable && (!(hdev->le_states[4] & 0x01) || 937 if (connectable && (!(hdev->le_states[4] & 0x40) ||
938 !(hdev->le_states[2] & 0x40))) 938 !(hdev->le_states[2] & 0x20)))
939 return false; 939 return false;
940 } 940 }
941 941
@@ -948,7 +948,7 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
948 /* Master connection state and connectable mode bit 35 and 948 /* Master connection state and connectable mode bit 35 and
949 * scannable 19. 949 * scannable 19.
950 */ 950 */
951 if (connectable && (!(hdev->le_states[4] & 0x10) || 951 if (connectable && (!(hdev->le_states[4] & 0x08) ||
952 !(hdev->le_states[2] & 0x08))) 952 !(hdev->le_states[2] & 0x08)))
953 return false; 953 return false;
954 } 954 }
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 923e9a271872..1506e1632394 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1340,7 +1340,7 @@ done:
1340} 1340}
1341 1341
1342static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, 1342static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
1343 int *addr_len, int peer) 1343 int peer)
1344{ 1344{
1345 struct sockaddr_hci *haddr = (struct sockaddr_hci *)addr; 1345 struct sockaddr_hci *haddr = (struct sockaddr_hci *)addr;
1346 struct sock *sk = sock->sk; 1346 struct sock *sk = sock->sk;
@@ -1360,10 +1360,10 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
1360 goto done; 1360 goto done;
1361 } 1361 }
1362 1362
1363 *addr_len = sizeof(*haddr);
1364 haddr->hci_family = AF_BLUETOOTH; 1363 haddr->hci_family = AF_BLUETOOTH;
1365 haddr->hci_dev = hdev->id; 1364 haddr->hci_dev = hdev->id;
1366 haddr->hci_channel= hci_pi(sk)->channel; 1365 haddr->hci_channel= hci_pi(sk)->channel;
1366 err = sizeof(*haddr);
1367 1367
1368done: 1368done:
1369 release_sock(sk); 1369 release_sock(sk);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 67a8642f57ea..686bdc6b35b0 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -358,7 +358,7 @@ done:
358} 358}
359 359
360static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, 360static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
361 int *len, int peer) 361 int peer)
362{ 362{
363 struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr; 363 struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
364 struct sock *sk = sock->sk; 364 struct sock *sk = sock->sk;
@@ -373,7 +373,6 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
373 373
374 memset(la, 0, sizeof(struct sockaddr_l2)); 374 memset(la, 0, sizeof(struct sockaddr_l2));
375 addr->sa_family = AF_BLUETOOTH; 375 addr->sa_family = AF_BLUETOOTH;
376 *len = sizeof(struct sockaddr_l2);
377 376
378 la->l2_psm = chan->psm; 377 la->l2_psm = chan->psm;
379 378
@@ -387,7 +386,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
387 la->l2_bdaddr_type = chan->src_type; 386 la->l2_bdaddr_type = chan->src_type;
388 } 387 }
389 388
390 return 0; 389 return sizeof(struct sockaddr_l2);
391} 390}
392 391
393static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, 392static int l2cap_sock_getsockopt_old(struct socket *sock, int optname,
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 1aaccf637479..93a3b219db09 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -533,7 +533,7 @@ done:
533 return err; 533 return err;
534} 534}
535 535
536static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer) 536static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int peer)
537{ 537{
538 struct sockaddr_rc *sa = (struct sockaddr_rc *) addr; 538 struct sockaddr_rc *sa = (struct sockaddr_rc *) addr;
539 struct sock *sk = sock->sk; 539 struct sock *sk = sock->sk;
@@ -552,8 +552,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *
552 else 552 else
553 bacpy(&sa->rc_bdaddr, &rfcomm_pi(sk)->src); 553 bacpy(&sa->rc_bdaddr, &rfcomm_pi(sk)->src);
554 554
555 *len = sizeof(struct sockaddr_rc); 555 return sizeof(struct sockaddr_rc);
556 return 0;
557} 556}
558 557
559static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg, 558static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg,
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 08df57665e1f..413b8ee49fec 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -680,7 +680,7 @@ done:
680} 680}
681 681
682static int sco_sock_getname(struct socket *sock, struct sockaddr *addr, 682static int sco_sock_getname(struct socket *sock, struct sockaddr *addr,
683 int *len, int peer) 683 int peer)
684{ 684{
685 struct sockaddr_sco *sa = (struct sockaddr_sco *) addr; 685 struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
686 struct sock *sk = sock->sk; 686 struct sock *sk = sock->sk;
@@ -688,14 +688,13 @@ static int sco_sock_getname(struct socket *sock, struct sockaddr *addr,
688 BT_DBG("sock %p, sk %p", sock, sk); 688 BT_DBG("sock %p, sk %p", sock, sk);
689 689
690 addr->sa_family = AF_BLUETOOTH; 690 addr->sa_family = AF_BLUETOOTH;
691 *len = sizeof(struct sockaddr_sco);
692 691
693 if (peer) 692 if (peer)
694 bacpy(&sa->sco_bdaddr, &sco_pi(sk)->dst); 693 bacpy(&sa->sco_bdaddr, &sco_pi(sk)->dst);
695 else 694 else
696 bacpy(&sa->sco_bdaddr, &sco_pi(sk)->src); 695 bacpy(&sa->sco_bdaddr, &sco_pi(sk)->src);
697 696
698 return 0; 697 return sizeof(struct sockaddr_sco);
699} 698}
700 699
701static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, 700static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg,
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 0254c35b2bf0..126a8ea73c96 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -255,6 +255,9 @@ static ssize_t brport_show(struct kobject *kobj,
255 struct brport_attribute *brport_attr = to_brport_attr(attr); 255 struct brport_attribute *brport_attr = to_brport_attr(attr);
256 struct net_bridge_port *p = to_brport(kobj); 256 struct net_bridge_port *p = to_brport(kobj);
257 257
258 if (!brport_attr->show)
259 return -EINVAL;
260
258 return brport_attr->show(p, buf); 261 return brport_attr->show(p, buf);
259} 262}
260 263
diff --git a/net/can/raw.c b/net/can/raw.c
index f2ecc43376a1..1051eee82581 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -470,7 +470,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
470} 470}
471 471
472static int raw_getname(struct socket *sock, struct sockaddr *uaddr, 472static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
473 int *len, int peer) 473 int peer)
474{ 474{
475 struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; 475 struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
476 struct sock *sk = sock->sk; 476 struct sock *sk = sock->sk;
@@ -483,9 +483,7 @@ static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
483 addr->can_family = AF_CAN; 483 addr->can_family = AF_CAN;
484 addr->can_ifindex = ro->ifindex; 484 addr->can_ifindex = ro->ifindex;
485 485
486 *len = sizeof(*addr); 486 return sizeof(*addr);
487
488 return 0;
489} 487}
490 488
491static int raw_setsockopt(struct socket *sock, int level, int optname, 489static int raw_setsockopt(struct socket *sock, int level, int optname,
diff --git a/net/core/dev.c b/net/core/dev.c
index dda9d7b9a840..5bdcc5a161fe 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2382,8 +2382,11 @@ EXPORT_SYMBOL(netdev_set_num_tc);
2382 */ 2382 */
2383int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) 2383int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
2384{ 2384{
2385 bool disabling;
2385 int rc; 2386 int rc;
2386 2387
2388 disabling = txq < dev->real_num_tx_queues;
2389
2387 if (txq < 1 || txq > dev->num_tx_queues) 2390 if (txq < 1 || txq > dev->num_tx_queues)
2388 return -EINVAL; 2391 return -EINVAL;
2389 2392
@@ -2399,15 +2402,19 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
2399 if (dev->num_tc) 2402 if (dev->num_tc)
2400 netif_setup_tc(dev, txq); 2403 netif_setup_tc(dev, txq);
2401 2404
2402 if (txq < dev->real_num_tx_queues) { 2405 dev->real_num_tx_queues = txq;
2406
2407 if (disabling) {
2408 synchronize_net();
2403 qdisc_reset_all_tx_gt(dev, txq); 2409 qdisc_reset_all_tx_gt(dev, txq);
2404#ifdef CONFIG_XPS 2410#ifdef CONFIG_XPS
2405 netif_reset_xps_queues_gt(dev, txq); 2411 netif_reset_xps_queues_gt(dev, txq);
2406#endif 2412#endif
2407 } 2413 }
2414 } else {
2415 dev->real_num_tx_queues = txq;
2408 } 2416 }
2409 2417
2410 dev->real_num_tx_queues = txq;
2411 return 0; 2418 return 0;
2412} 2419}
2413EXPORT_SYMBOL(netif_set_real_num_tx_queues); 2420EXPORT_SYMBOL(netif_set_real_num_tx_queues);
@@ -8134,8 +8141,9 @@ void netdev_run_todo(void)
8134 BUG_ON(!list_empty(&dev->ptype_specific)); 8141 BUG_ON(!list_empty(&dev->ptype_specific));
8135 WARN_ON(rcu_access_pointer(dev->ip_ptr)); 8142 WARN_ON(rcu_access_pointer(dev->ip_ptr));
8136 WARN_ON(rcu_access_pointer(dev->ip6_ptr)); 8143 WARN_ON(rcu_access_pointer(dev->ip6_ptr));
8144#if IS_ENABLED(CONFIG_DECNET)
8137 WARN_ON(dev->dn_ptr); 8145 WARN_ON(dev->dn_ptr);
8138 8146#endif
8139 if (dev->priv_destructor) 8147 if (dev->priv_destructor)
8140 dev->priv_destructor(dev); 8148 dev->priv_destructor(dev);
8141 if (dev->needs_free_netdev) 8149 if (dev->needs_free_netdev)
@@ -8833,6 +8841,7 @@ static void __net_exit netdev_exit(struct net *net)
8833static struct pernet_operations __net_initdata netdev_net_ops = { 8841static struct pernet_operations __net_initdata netdev_net_ops = {
8834 .init = netdev_init, 8842 .init = netdev_init,
8835 .exit = netdev_exit, 8843 .exit = netdev_exit,
8844 .async = true,
8836}; 8845};
8837 8846
8838static void __net_exit default_device_exit(struct net *net) 8847static void __net_exit default_device_exit(struct net *net)
@@ -8933,6 +8942,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
8933static struct pernet_operations __net_initdata default_device_ops = { 8942static struct pernet_operations __net_initdata default_device_ops = {
8934 .exit = default_device_exit, 8943 .exit = default_device_exit,
8935 .exit_batch = default_device_exit_batch, 8944 .exit_batch = default_device_exit_batch,
8945 .async = true,
8936}; 8946};
8937 8947
8938/* 8948/*
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 18d385ed8237..88e846779269 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2338,6 +2338,32 @@ out:
2338 resource->size_valid = size_valid; 2338 resource->size_valid = size_valid;
2339} 2339}
2340 2340
2341static int
2342devlink_resource_validate_size(struct devlink_resource *resource, u64 size,
2343 struct netlink_ext_ack *extack)
2344{
2345 u64 reminder;
2346 int err = 0;
2347
2348 if (size > resource->size_params->size_max) {
2349 NL_SET_ERR_MSG_MOD(extack, "Size larger than maximum");
2350 err = -EINVAL;
2351 }
2352
2353 if (size < resource->size_params->size_min) {
2354 NL_SET_ERR_MSG_MOD(extack, "Size smaller than minimum");
2355 err = -EINVAL;
2356 }
2357
2358 div64_u64_rem(size, resource->size_params->size_granularity, &reminder);
2359 if (reminder) {
2360 NL_SET_ERR_MSG_MOD(extack, "Wrong granularity");
2361 err = -EINVAL;
2362 }
2363
2364 return err;
2365}
2366
2341static int devlink_nl_cmd_resource_set(struct sk_buff *skb, 2367static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
2342 struct genl_info *info) 2368 struct genl_info *info)
2343{ 2369{
@@ -2356,12 +2382,8 @@ static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
2356 if (!resource) 2382 if (!resource)
2357 return -EINVAL; 2383 return -EINVAL;
2358 2384
2359 if (!resource->resource_ops->size_validate)
2360 return -EINVAL;
2361
2362 size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]); 2385 size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]);
2363 err = resource->resource_ops->size_validate(devlink, size, 2386 err = devlink_resource_validate_size(resource, size, info->extack);
2364 info->extack);
2365 if (err) 2387 if (err)
2366 return err; 2388 return err;
2367 2389
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index 0c048bdeb016..5ace0705a3f9 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -171,6 +171,7 @@ static void __net_exit fib_notifier_net_exit(struct net *net)
171static struct pernet_operations fib_notifier_net_ops = { 171static struct pernet_operations fib_notifier_net_ops = {
172 .init = fib_notifier_net_init, 172 .init = fib_notifier_net_init,
173 .exit = fib_notifier_net_exit, 173 .exit = fib_notifier_net_exit,
174 .async = true,
174}; 175};
175 176
176static int __init fib_notifier_init(void) 177static int __init fib_notifier_init(void)
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 98e1066c3d55..88298f18cbae 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -51,6 +51,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
51 r->pref = pref; 51 r->pref = pref;
52 r->table = table; 52 r->table = table;
53 r->flags = flags; 53 r->flags = flags;
54 r->proto = RTPROT_KERNEL;
54 r->fr_net = ops->fro_net; 55 r->fr_net = ops->fro_net;
55 r->uid_range = fib_kuid_range_unset; 56 r->uid_range = fib_kuid_range_unset;
56 57
@@ -465,6 +466,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
465 } 466 }
466 refcount_set(&rule->refcnt, 1); 467 refcount_set(&rule->refcnt, 1);
467 rule->fr_net = net; 468 rule->fr_net = net;
469 rule->proto = frh->proto;
468 470
469 rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY]) 471 rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
470 : fib_default_rule_pref(ops); 472 : fib_default_rule_pref(ops);
@@ -664,6 +666,9 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
664 } 666 }
665 667
666 list_for_each_entry(rule, &ops->rules_list, list) { 668 list_for_each_entry(rule, &ops->rules_list, list) {
669 if (frh->proto && (frh->proto != rule->proto))
670 continue;
671
667 if (frh->action && (frh->action != rule->action)) 672 if (frh->action && (frh->action != rule->action))
668 continue; 673 continue;
669 674
@@ -808,9 +813,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
808 if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen)) 813 if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
809 goto nla_put_failure; 814 goto nla_put_failure;
810 frh->res1 = 0; 815 frh->res1 = 0;
811 frh->res2 = 0;
812 frh->action = rule->action; 816 frh->action = rule->action;
813 frh->flags = rule->flags; 817 frh->flags = rule->flags;
818 frh->proto = rule->proto;
814 819
815 if (rule->action == FR_ACT_GOTO && 820 if (rule->action == FR_ACT_GOTO &&
816 rcu_access_pointer(rule->ctarget) == NULL) 821 rcu_access_pointer(rule->ctarget) == NULL)
@@ -1030,6 +1035,7 @@ static void __net_exit fib_rules_net_exit(struct net *net)
1030static struct pernet_operations fib_rules_net_ops = { 1035static struct pernet_operations fib_rules_net_ops = {
1031 .init = fib_rules_net_init, 1036 .init = fib_rules_net_init,
1032 .exit = fib_rules_net_exit, 1037 .exit = fib_rules_net_exit,
1038 .async = true,
1033}; 1039};
1034 1040
1035static int __init fib_rules_init(void) 1041static int __init fib_rules_init(void)
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index e010bb800d7b..65b51e778782 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -349,6 +349,7 @@ static void __net_exit dev_proc_net_exit(struct net *net)
349static struct pernet_operations __net_initdata dev_proc_ops = { 349static struct pernet_operations __net_initdata dev_proc_ops = {
350 .init = dev_proc_net_init, 350 .init = dev_proc_net_init,
351 .exit = dev_proc_net_exit, 351 .exit = dev_proc_net_exit,
352 .async = true,
352}; 353};
353 354
354static int dev_mc_seq_show(struct seq_file *seq, void *v) 355static int dev_mc_seq_show(struct seq_file *seq, void *v)
@@ -405,6 +406,7 @@ static void __net_exit dev_mc_net_exit(struct net *net)
405static struct pernet_operations __net_initdata dev_mc_net_ops = { 406static struct pernet_operations __net_initdata dev_mc_net_ops = {
406 .init = dev_mc_net_init, 407 .init = dev_mc_net_init,
407 .exit = dev_mc_net_exit, 408 .exit = dev_mc_net_exit,
409 .async = true,
408}; 410};
409 411
410int __init dev_proc_init(void) 412int __init dev_proc_init(void)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3cad5f51afd3..27a55236ad64 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -29,7 +29,6 @@
29 29
30static LIST_HEAD(pernet_list); 30static LIST_HEAD(pernet_list);
31static struct list_head *first_device = &pernet_list; 31static struct list_head *first_device = &pernet_list;
32DEFINE_MUTEX(net_mutex);
33 32
34LIST_HEAD(net_namespace_list); 33LIST_HEAD(net_namespace_list);
35EXPORT_SYMBOL_GPL(net_namespace_list); 34EXPORT_SYMBOL_GPL(net_namespace_list);
@@ -41,6 +40,12 @@ struct net init_net = {
41EXPORT_SYMBOL(init_net); 40EXPORT_SYMBOL(init_net);
42 41
43static bool init_net_initialized; 42static bool init_net_initialized;
43static unsigned nr_sync_pernet_ops;
44/*
45 * net_sem: protects: pernet_list, net_generic_ids, nr_sync_pernet_ops,
46 * init_net_initialized and first_device pointer.
47 */
48DECLARE_RWSEM(net_sem);
44 49
45#define MIN_PERNET_OPS_ID \ 50#define MIN_PERNET_OPS_ID \
46 ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *)) 51 ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
@@ -65,11 +70,10 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data)
65{ 70{
66 struct net_generic *ng, *old_ng; 71 struct net_generic *ng, *old_ng;
67 72
68 BUG_ON(!mutex_is_locked(&net_mutex));
69 BUG_ON(id < MIN_PERNET_OPS_ID); 73 BUG_ON(id < MIN_PERNET_OPS_ID);
70 74
71 old_ng = rcu_dereference_protected(net->gen, 75 old_ng = rcu_dereference_protected(net->gen,
72 lockdep_is_held(&net_mutex)); 76 lockdep_is_held(&net_sem));
73 if (old_ng->s.len > id) { 77 if (old_ng->s.len > id) {
74 old_ng->ptr[id] = data; 78 old_ng->ptr[id] = data;
75 return 0; 79 return 0;
@@ -286,7 +290,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
286 */ 290 */
287static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) 291static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
288{ 292{
289 /* Must be called with net_mutex held */ 293 /* Must be called with net_sem held */
290 const struct pernet_operations *ops, *saved_ops; 294 const struct pernet_operations *ops, *saved_ops;
291 int error = 0; 295 int error = 0;
292 LIST_HEAD(net_exit_list); 296 LIST_HEAD(net_exit_list);
@@ -303,6 +307,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
303 if (error < 0) 307 if (error < 0)
304 goto out_undo; 308 goto out_undo;
305 } 309 }
310 rtnl_lock();
311 list_add_tail_rcu(&net->list, &net_namespace_list);
312 rtnl_unlock();
306out: 313out:
307 return error; 314 return error;
308 315
@@ -331,6 +338,7 @@ static int __net_init net_defaults_init_net(struct net *net)
331 338
332static struct pernet_operations net_defaults_ops = { 339static struct pernet_operations net_defaults_ops = {
333 .init = net_defaults_init_net, 340 .init = net_defaults_init_net,
341 .async = true,
334}; 342};
335 343
336static __init int net_defaults_init(void) 344static __init int net_defaults_init(void)
@@ -397,6 +405,7 @@ struct net *copy_net_ns(unsigned long flags,
397{ 405{
398 struct ucounts *ucounts; 406 struct ucounts *ucounts;
399 struct net *net; 407 struct net *net;
408 unsigned write;
400 int rv; 409 int rv;
401 410
402 if (!(flags & CLONE_NEWNET)) 411 if (!(flags & CLONE_NEWNET))
@@ -408,32 +417,38 @@ struct net *copy_net_ns(unsigned long flags,
408 417
409 net = net_alloc(); 418 net = net_alloc();
410 if (!net) { 419 if (!net) {
411 dec_net_namespaces(ucounts); 420 rv = -ENOMEM;
412 return ERR_PTR(-ENOMEM); 421 goto dec_ucounts;
413 } 422 }
414 423 refcount_set(&net->passive, 1);
424 net->ucounts = ucounts;
415 get_user_ns(user_ns); 425 get_user_ns(user_ns);
426again:
427 write = READ_ONCE(nr_sync_pernet_ops);
428 if (write)
429 rv = down_write_killable(&net_sem);
430 else
431 rv = down_read_killable(&net_sem);
432 if (rv < 0)
433 goto put_userns;
416 434
417 rv = mutex_lock_killable(&net_mutex); 435 if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) {
418 if (rv < 0) { 436 up_read(&net_sem);
419 net_free(net); 437 goto again;
420 dec_net_namespaces(ucounts);
421 put_user_ns(user_ns);
422 return ERR_PTR(rv);
423 } 438 }
424
425 net->ucounts = ucounts;
426 rv = setup_net(net, user_ns); 439 rv = setup_net(net, user_ns);
427 if (rv == 0) { 440
428 rtnl_lock(); 441 if (write)
429 list_add_tail_rcu(&net->list, &net_namespace_list); 442 up_write(&net_sem);
430 rtnl_unlock(); 443 else
431 } 444 up_read(&net_sem);
432 mutex_unlock(&net_mutex); 445
433 if (rv < 0) { 446 if (rv < 0) {
434 dec_net_namespaces(ucounts); 447put_userns:
435 put_user_ns(user_ns); 448 put_user_ns(user_ns);
436 net_drop_ns(net); 449 net_drop_ns(net);
450dec_ucounts:
451 dec_net_namespaces(ucounts);
437 return ERR_PTR(rv); 452 return ERR_PTR(rv);
438 } 453 }
439 return net; 454 return net;
@@ -466,26 +481,33 @@ static void unhash_nsid(struct net *net, struct net *last)
466 spin_unlock_bh(&net->nsid_lock); 481 spin_unlock_bh(&net->nsid_lock);
467} 482}
468 483
469static DEFINE_SPINLOCK(cleanup_list_lock); 484static LLIST_HEAD(cleanup_list);
470static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
471 485
472static void cleanup_net(struct work_struct *work) 486static void cleanup_net(struct work_struct *work)
473{ 487{
474 const struct pernet_operations *ops; 488 const struct pernet_operations *ops;
475 struct net *net, *tmp, *last; 489 struct net *net, *tmp, *last;
476 struct list_head net_kill_list; 490 struct llist_node *net_kill_list;
477 LIST_HEAD(net_exit_list); 491 LIST_HEAD(net_exit_list);
492 unsigned write;
478 493
479 /* Atomically snapshot the list of namespaces to cleanup */ 494 /* Atomically snapshot the list of namespaces to cleanup */
480 spin_lock_irq(&cleanup_list_lock); 495 net_kill_list = llist_del_all(&cleanup_list);
481 list_replace_init(&cleanup_list, &net_kill_list); 496again:
482 spin_unlock_irq(&cleanup_list_lock); 497 write = READ_ONCE(nr_sync_pernet_ops);
498 if (write)
499 down_write(&net_sem);
500 else
501 down_read(&net_sem);
483 502
484 mutex_lock(&net_mutex); 503 if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) {
504 up_read(&net_sem);
505 goto again;
506 }
485 507
486 /* Don't let anyone else find us. */ 508 /* Don't let anyone else find us. */
487 rtnl_lock(); 509 rtnl_lock();
488 list_for_each_entry(net, &net_kill_list, cleanup_list) 510 llist_for_each_entry(net, net_kill_list, cleanup_list)
489 list_del_rcu(&net->list); 511 list_del_rcu(&net->list);
490 /* Cache last net. After we unlock rtnl, no one new net 512 /* Cache last net. After we unlock rtnl, no one new net
491 * added to net_namespace_list can assign nsid pointer 513 * added to net_namespace_list can assign nsid pointer
@@ -500,7 +522,7 @@ static void cleanup_net(struct work_struct *work)
500 last = list_last_entry(&net_namespace_list, struct net, list); 522 last = list_last_entry(&net_namespace_list, struct net, list);
501 rtnl_unlock(); 523 rtnl_unlock();
502 524
503 list_for_each_entry(net, &net_kill_list, cleanup_list) { 525 llist_for_each_entry(net, net_kill_list, cleanup_list) {
504 unhash_nsid(net, last); 526 unhash_nsid(net, last);
505 list_add_tail(&net->exit_list, &net_exit_list); 527 list_add_tail(&net->exit_list, &net_exit_list);
506 } 528 }
@@ -520,7 +542,10 @@ static void cleanup_net(struct work_struct *work)
520 list_for_each_entry_reverse(ops, &pernet_list, list) 542 list_for_each_entry_reverse(ops, &pernet_list, list)
521 ops_free_list(ops, &net_exit_list); 543 ops_free_list(ops, &net_exit_list);
522 544
523 mutex_unlock(&net_mutex); 545 if (write)
546 up_write(&net_sem);
547 else
548 up_read(&net_sem);
524 549
525 /* Ensure there are no outstanding rcu callbacks using this 550 /* Ensure there are no outstanding rcu callbacks using this
526 * network namespace. 551 * network namespace.
@@ -547,8 +572,8 @@ static void cleanup_net(struct work_struct *work)
547 */ 572 */
548void net_ns_barrier(void) 573void net_ns_barrier(void)
549{ 574{
550 mutex_lock(&net_mutex); 575 down_write(&net_sem);
551 mutex_unlock(&net_mutex); 576 up_write(&net_sem);
552} 577}
553EXPORT_SYMBOL(net_ns_barrier); 578EXPORT_SYMBOL(net_ns_barrier);
554 579
@@ -557,13 +582,8 @@ static DECLARE_WORK(net_cleanup_work, cleanup_net);
557void __put_net(struct net *net) 582void __put_net(struct net *net)
558{ 583{
559 /* Cleanup the network namespace in process context */ 584 /* Cleanup the network namespace in process context */
560 unsigned long flags; 585 if (llist_add(&net->cleanup_list, &cleanup_list))
561 586 queue_work(netns_wq, &net_cleanup_work);
562 spin_lock_irqsave(&cleanup_list_lock, flags);
563 list_add(&net->cleanup_list, &cleanup_list);
564 spin_unlock_irqrestore(&cleanup_list_lock, flags);
565
566 queue_work(netns_wq, &net_cleanup_work);
567} 587}
568EXPORT_SYMBOL_GPL(__put_net); 588EXPORT_SYMBOL_GPL(__put_net);
569 589
@@ -633,6 +653,7 @@ static __net_exit void net_ns_net_exit(struct net *net)
633static struct pernet_operations __net_initdata net_ns_ops = { 653static struct pernet_operations __net_initdata net_ns_ops = {
634 .init = net_ns_net_init, 654 .init = net_ns_net_init,
635 .exit = net_ns_net_exit, 655 .exit = net_ns_net_exit,
656 .async = true,
636}; 657};
637 658
638static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { 659static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
@@ -875,17 +896,12 @@ static int __init net_ns_init(void)
875 896
876 rcu_assign_pointer(init_net.gen, ng); 897 rcu_assign_pointer(init_net.gen, ng);
877 898
878 mutex_lock(&net_mutex); 899 down_write(&net_sem);
879 if (setup_net(&init_net, &init_user_ns)) 900 if (setup_net(&init_net, &init_user_ns))
880 panic("Could not setup the initial network namespace"); 901 panic("Could not setup the initial network namespace");
881 902
882 init_net_initialized = true; 903 init_net_initialized = true;
883 904 up_write(&net_sem);
884 rtnl_lock();
885 list_add_tail_rcu(&init_net.list, &net_namespace_list);
886 rtnl_unlock();
887
888 mutex_unlock(&net_mutex);
889 905
890 register_pernet_subsys(&net_ns_ops); 906 register_pernet_subsys(&net_ns_ops);
891 907
@@ -989,6 +1005,9 @@ again:
989 rcu_barrier(); 1005 rcu_barrier();
990 if (ops->id) 1006 if (ops->id)
991 ida_remove(&net_generic_ids, *ops->id); 1007 ida_remove(&net_generic_ids, *ops->id);
1008 } else if (!ops->async) {
1009 pr_info_once("Pernet operations %ps are sync.\n", ops);
1010 nr_sync_pernet_ops++;
992 } 1011 }
993 1012
994 return error; 1013 return error;
@@ -996,7 +1015,8 @@ again:
996 1015
997static void unregister_pernet_operations(struct pernet_operations *ops) 1016static void unregister_pernet_operations(struct pernet_operations *ops)
998{ 1017{
999 1018 if (!ops->async)
1019 BUG_ON(nr_sync_pernet_ops-- == 0);
1000 __unregister_pernet_operations(ops); 1020 __unregister_pernet_operations(ops);
1001 rcu_barrier(); 1021 rcu_barrier();
1002 if (ops->id) 1022 if (ops->id)
@@ -1025,9 +1045,9 @@ static void unregister_pernet_operations(struct pernet_operations *ops)
1025int register_pernet_subsys(struct pernet_operations *ops) 1045int register_pernet_subsys(struct pernet_operations *ops)
1026{ 1046{
1027 int error; 1047 int error;
1028 mutex_lock(&net_mutex); 1048 down_write(&net_sem);
1029 error = register_pernet_operations(first_device, ops); 1049 error = register_pernet_operations(first_device, ops);
1030 mutex_unlock(&net_mutex); 1050 up_write(&net_sem);
1031 return error; 1051 return error;
1032} 1052}
1033EXPORT_SYMBOL_GPL(register_pernet_subsys); 1053EXPORT_SYMBOL_GPL(register_pernet_subsys);
@@ -1043,9 +1063,9 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys);
1043 */ 1063 */
1044void unregister_pernet_subsys(struct pernet_operations *ops) 1064void unregister_pernet_subsys(struct pernet_operations *ops)
1045{ 1065{
1046 mutex_lock(&net_mutex); 1066 down_write(&net_sem);
1047 unregister_pernet_operations(ops); 1067 unregister_pernet_operations(ops);
1048 mutex_unlock(&net_mutex); 1068 up_write(&net_sem);
1049} 1069}
1050EXPORT_SYMBOL_GPL(unregister_pernet_subsys); 1070EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
1051 1071
@@ -1071,11 +1091,11 @@ EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
1071int register_pernet_device(struct pernet_operations *ops) 1091int register_pernet_device(struct pernet_operations *ops)
1072{ 1092{
1073 int error; 1093 int error;
1074 mutex_lock(&net_mutex); 1094 down_write(&net_sem);
1075 error = register_pernet_operations(&pernet_list, ops); 1095 error = register_pernet_operations(&pernet_list, ops);
1076 if (!error && (first_device == &pernet_list)) 1096 if (!error && (first_device == &pernet_list))
1077 first_device = &ops->list; 1097 first_device = &ops->list;
1078 mutex_unlock(&net_mutex); 1098 up_write(&net_sem);
1079 return error; 1099 return error;
1080} 1100}
1081EXPORT_SYMBOL_GPL(register_pernet_device); 1101EXPORT_SYMBOL_GPL(register_pernet_device);
@@ -1091,11 +1111,11 @@ EXPORT_SYMBOL_GPL(register_pernet_device);
1091 */ 1111 */
1092void unregister_pernet_device(struct pernet_operations *ops) 1112void unregister_pernet_device(struct pernet_operations *ops)
1093{ 1113{
1094 mutex_lock(&net_mutex); 1114 down_write(&net_sem);
1095 if (&ops->list == first_device) 1115 if (&ops->list == first_device)
1096 first_device = first_device->next; 1116 first_device = first_device->next;
1097 unregister_pernet_operations(ops); 1117 unregister_pernet_operations(ops);
1098 mutex_unlock(&net_mutex); 1118 up_write(&net_sem);
1099} 1119}
1100EXPORT_SYMBOL_GPL(unregister_pernet_device); 1120EXPORT_SYMBOL_GPL(unregister_pernet_device);
1101 1121
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bc290413a49d..67f375cfb982 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -454,11 +454,11 @@ static void rtnl_lock_unregistering_all(void)
454void rtnl_link_unregister(struct rtnl_link_ops *ops) 454void rtnl_link_unregister(struct rtnl_link_ops *ops)
455{ 455{
456 /* Close the race with cleanup_net() */ 456 /* Close the race with cleanup_net() */
457 mutex_lock(&net_mutex); 457 down_write(&net_sem);
458 rtnl_lock_unregistering_all(); 458 rtnl_lock_unregistering_all();
459 __rtnl_link_unregister(ops); 459 __rtnl_link_unregister(ops);
460 rtnl_unlock(); 460 rtnl_unlock();
461 mutex_unlock(&net_mutex); 461 up_write(&net_sem);
462} 462}
463EXPORT_SYMBOL_GPL(rtnl_link_unregister); 463EXPORT_SYMBOL_GPL(rtnl_link_unregister);
464 464
@@ -4724,6 +4724,7 @@ static void __net_exit rtnetlink_net_exit(struct net *net)
4724static struct pernet_operations rtnetlink_net_ops = { 4724static struct pernet_operations rtnetlink_net_ops = {
4725 .init = rtnetlink_net_init, 4725 .init = rtnetlink_net_init,
4726 .exit = rtnetlink_net_exit, 4726 .exit = rtnetlink_net_exit,
4727 .async = true,
4727}; 4728};
4728 4729
4729void __init rtnetlink_init(void) 4730void __init rtnetlink_init(void)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 09bd89c90a71..1a7485a2cdfa 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -890,7 +890,7 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
890} 890}
891EXPORT_SYMBOL_GPL(skb_morph); 891EXPORT_SYMBOL_GPL(skb_morph);
892 892
893static int mm_account_pinned_pages(struct mmpin *mmp, size_t size) 893int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
894{ 894{
895 unsigned long max_pg, num_pg, new_pg, old_pg; 895 unsigned long max_pg, num_pg, new_pg, old_pg;
896 struct user_struct *user; 896 struct user_struct *user;
@@ -919,14 +919,16 @@ static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
919 919
920 return 0; 920 return 0;
921} 921}
922EXPORT_SYMBOL_GPL(mm_account_pinned_pages);
922 923
923static void mm_unaccount_pinned_pages(struct mmpin *mmp) 924void mm_unaccount_pinned_pages(struct mmpin *mmp)
924{ 925{
925 if (mmp->user) { 926 if (mmp->user) {
926 atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm); 927 atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm);
927 free_uid(mmp->user); 928 free_uid(mmp->user);
928 } 929 }
929} 930}
931EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
930 932
931struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size) 933struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
932{ 934{
diff --git a/net/core/sock.c b/net/core/sock.c
index c501499a04fe..507d8c6c4319 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1049,18 +1049,21 @@ set_rcvbuf:
1049 break; 1049 break;
1050 1050
1051 case SO_ZEROCOPY: 1051 case SO_ZEROCOPY:
1052 if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6) 1052 if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
1053 if (sk->sk_protocol != IPPROTO_TCP)
1054 ret = -ENOTSUPP;
1055 else if (sk->sk_state != TCP_CLOSE)
1056 ret = -EBUSY;
1057 } else if (sk->sk_family != PF_RDS) {
1053 ret = -ENOTSUPP; 1058 ret = -ENOTSUPP;
1054 else if (sk->sk_protocol != IPPROTO_TCP) 1059 }
1055 ret = -ENOTSUPP; 1060 if (!ret) {
1056 else if (sk->sk_state != TCP_CLOSE) 1061 if (val < 0 || val > 1)
1057 ret = -EBUSY; 1062 ret = -EINVAL;
1058 else if (val < 0 || val > 1) 1063 else
1059 ret = -EINVAL; 1064 sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
1060 else 1065 break;
1061 sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool); 1066 }
1062 break;
1063
1064 default: 1067 default:
1065 ret = -ENOPROTOOPT; 1068 ret = -ENOPROTOOPT;
1066 break; 1069 break;
@@ -1274,7 +1277,8 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
1274 { 1277 {
1275 char address[128]; 1278 char address[128];
1276 1279
1277 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2)) 1280 lv = sock->ops->getname(sock, (struct sockaddr *)address, 2);
1281 if (lv < 0)
1278 return -ENOTCONN; 1282 return -ENOTCONN;
1279 if (lv < len) 1283 if (lv < len)
1280 return -EINVAL; 1284 return -EINVAL;
@@ -1773,7 +1777,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1773 u32 max_segs = 1; 1777 u32 max_segs = 1;
1774 1778
1775 sk_dst_set(sk, dst); 1779 sk_dst_set(sk, dst);
1776 sk->sk_route_caps = dst->dev->features; 1780 sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps;
1777 if (sk->sk_route_caps & NETIF_F_GSO) 1781 if (sk->sk_route_caps & NETIF_F_GSO)
1778 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; 1782 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1779 sk->sk_route_caps &= ~sk->sk_route_nocaps; 1783 sk->sk_route_caps &= ~sk->sk_route_nocaps;
@@ -2497,7 +2501,7 @@ int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
2497EXPORT_SYMBOL(sock_no_accept); 2501EXPORT_SYMBOL(sock_no_accept);
2498 2502
2499int sock_no_getname(struct socket *sock, struct sockaddr *saddr, 2503int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2500 int *len, int peer) 2504 int peer)
2501{ 2505{
2502 return -EOPNOTSUPP; 2506 return -EOPNOTSUPP;
2503} 2507}
@@ -3111,6 +3115,7 @@ static void __net_exit sock_inuse_exit_net(struct net *net)
3111static struct pernet_operations net_inuse_ops = { 3115static struct pernet_operations net_inuse_ops = {
3112 .init = sock_inuse_init_net, 3116 .init = sock_inuse_init_net,
3113 .exit = sock_inuse_exit_net, 3117 .exit = sock_inuse_exit_net,
3118 .async = true,
3114}; 3119};
3115 3120
3116static __init int net_inuse_init(void) 3121static __init int net_inuse_init(void)
@@ -3384,6 +3389,7 @@ static __net_exit void proto_exit_net(struct net *net)
3384static __net_initdata struct pernet_operations proto_net_ops = { 3389static __net_initdata struct pernet_operations proto_net_ops = {
3385 .init = proto_init_net, 3390 .init = proto_init_net,
3386 .exit = proto_exit_net, 3391 .exit = proto_exit_net,
3392 .async = true,
3387}; 3393};
3388 3394
3389static int __init proto_init(void) 3395static int __init proto_init(void)
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 146b50e30659..aee5642affd9 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -328,6 +328,7 @@ static void __net_exit diag_net_exit(struct net *net)
328static struct pernet_operations diag_net_ops = { 328static struct pernet_operations diag_net_ops = {
329 .init = diag_net_init, 329 .init = diag_net_init,
330 .exit = diag_net_exit, 330 .exit = diag_net_exit,
331 .async = true,
331}; 332};
332 333
333static int __init sock_diag_init(void) 334static int __init sock_diag_init(void)
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index f2d0462611c3..d714f65782b7 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -572,6 +572,7 @@ static __net_exit void sysctl_core_net_exit(struct net *net)
572static __net_initdata struct pernet_operations sysctl_core_ops = { 572static __net_initdata struct pernet_operations sysctl_core_ops = {
573 .init = sysctl_core_net_init, 573 .init = sysctl_core_net_init,
574 .exit = sysctl_core_net_exit, 574 .exit = sysctl_core_net_exit,
575 .async = true,
575}; 576};
576 577
577static __init int sysctl_core_init(void) 578static __init int sysctl_core_init(void)
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 91dd09f79808..2ee8306c23e3 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1180,14 +1180,12 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags,
1180} 1180}
1181 1181
1182 1182
1183static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len,int peer) 1183static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer)
1184{ 1184{
1185 struct sockaddr_dn *sa = (struct sockaddr_dn *)uaddr; 1185 struct sockaddr_dn *sa = (struct sockaddr_dn *)uaddr;
1186 struct sock *sk = sock->sk; 1186 struct sock *sk = sock->sk;
1187 struct dn_scp *scp = DN_SK(sk); 1187 struct dn_scp *scp = DN_SK(sk);
1188 1188
1189 *uaddr_len = sizeof(struct sockaddr_dn);
1190
1191 lock_sock(sk); 1189 lock_sock(sk);
1192 1190
1193 if (peer) { 1191 if (peer) {
@@ -1205,7 +1203,7 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len
1205 1203
1206 release_sock(sk); 1204 release_sock(sk);
1207 1205
1208 return 0; 1206 return sizeof(struct sockaddr_dn);
1209} 1207}
1210 1208
1211 1209
@@ -1338,6 +1336,12 @@ static int dn_setsockopt(struct socket *sock, int level, int optname, char __use
1338 lock_sock(sk); 1336 lock_sock(sk);
1339 err = __dn_setsockopt(sock, level, optname, optval, optlen, 0); 1337 err = __dn_setsockopt(sock, level, optname, optval, optlen, 0);
1340 release_sock(sk); 1338 release_sock(sk);
1339#ifdef CONFIG_NETFILTER
1340 /* we need to exclude all possible ENOPROTOOPTs except default case */
1341 if (err == -ENOPROTOOPT && optname != DSO_LINKINFO &&
1342 optname != DSO_STREAM && optname != DSO_SEQPACKET)
1343 err = nf_setsockopt(sk, PF_DECnet, optname, optval, optlen);
1344#endif
1341 1345
1342 return err; 1346 return err;
1343} 1347}
@@ -1445,15 +1449,6 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us
1445 dn_nsp_send_disc(sk, 0x38, 0, sk->sk_allocation); 1449 dn_nsp_send_disc(sk, 0x38, 0, sk->sk_allocation);
1446 break; 1450 break;
1447 1451
1448 default:
1449#ifdef CONFIG_NETFILTER
1450 return nf_setsockopt(sk, PF_DECnet, optname, optval, optlen);
1451#endif
1452 case DSO_LINKINFO:
1453 case DSO_STREAM:
1454 case DSO_SEQPACKET:
1455 return -ENOPROTOOPT;
1456
1457 case DSO_MAXWINDOW: 1452 case DSO_MAXWINDOW:
1458 if (optlen != sizeof(unsigned long)) 1453 if (optlen != sizeof(unsigned long))
1459 return -EINVAL; 1454 return -EINVAL;
@@ -1501,6 +1496,12 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us
1501 return -EINVAL; 1496 return -EINVAL;
1502 scp->info_loc = u.info; 1497 scp->info_loc = u.info;
1503 break; 1498 break;
1499
1500 case DSO_LINKINFO:
1501 case DSO_STREAM:
1502 case DSO_SEQPACKET:
1503 default:
1504 return -ENOPROTOOPT;
1504 } 1505 }
1505 1506
1506 return 0; 1507 return 0;
@@ -1514,6 +1515,20 @@ static int dn_getsockopt(struct socket *sock, int level, int optname, char __use
1514 lock_sock(sk); 1515 lock_sock(sk);
1515 err = __dn_getsockopt(sock, level, optname, optval, optlen, 0); 1516 err = __dn_getsockopt(sock, level, optname, optval, optlen, 0);
1516 release_sock(sk); 1517 release_sock(sk);
1518#ifdef CONFIG_NETFILTER
1519 if (err == -ENOPROTOOPT && optname != DSO_STREAM &&
1520 optname != DSO_SEQPACKET && optname != DSO_CONACCEPT &&
1521 optname != DSO_CONREJECT) {
1522 int len;
1523
1524 if (get_user(len, optlen))
1525 return -EFAULT;
1526
1527 err = nf_getsockopt(sk, PF_DECnet, optname, optval, &len);
1528 if (err >= 0)
1529 err = put_user(len, optlen);
1530 }
1531#endif
1517 1532
1518 return err; 1533 return err;
1519} 1534}
@@ -1579,26 +1594,6 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us
1579 r_data = &link; 1594 r_data = &link;
1580 break; 1595 break;
1581 1596
1582 default:
1583#ifdef CONFIG_NETFILTER
1584 {
1585 int ret, len;
1586
1587 if (get_user(len, optlen))
1588 return -EFAULT;
1589
1590 ret = nf_getsockopt(sk, PF_DECnet, optname, optval, &len);
1591 if (ret >= 0)
1592 ret = put_user(len, optlen);
1593 return ret;
1594 }
1595#endif
1596 case DSO_STREAM:
1597 case DSO_SEQPACKET:
1598 case DSO_CONACCEPT:
1599 case DSO_CONREJECT:
1600 return -ENOPROTOOPT;
1601
1602 case DSO_MAXWINDOW: 1597 case DSO_MAXWINDOW:
1603 if (r_len > sizeof(unsigned long)) 1598 if (r_len > sizeof(unsigned long))
1604 r_len = sizeof(unsigned long); 1599 r_len = sizeof(unsigned long);
@@ -1630,6 +1625,13 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us
1630 r_len = sizeof(unsigned char); 1625 r_len = sizeof(unsigned char);
1631 r_data = &scp->info_rem; 1626 r_data = &scp->info_rem;
1632 break; 1627 break;
1628
1629 case DSO_STREAM:
1630 case DSO_SEQPACKET:
1631 case DSO_CONACCEPT:
1632 case DSO_CONREJECT:
1633 default:
1634 return -ENOPROTOOPT;
1633 } 1635 }
1634 1636
1635 if (r_data) { 1637 if (r_data) {
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 6a9d0f50fbee..e63c554e0623 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -23,6 +23,7 @@
23#include <linux/netdevice.h> 23#include <linux/netdevice.h>
24#include <linux/sysfs.h> 24#include <linux/sysfs.h>
25#include <linux/phy_fixed.h> 25#include <linux/phy_fixed.h>
26#include <linux/ptp_classify.h>
26#include <linux/gpio/consumer.h> 27#include <linux/gpio/consumer.h>
27#include <linux/etherdevice.h> 28#include <linux/etherdevice.h>
28 29
@@ -122,6 +123,38 @@ struct net_device *dsa_dev_to_net_device(struct device *dev)
122} 123}
123EXPORT_SYMBOL_GPL(dsa_dev_to_net_device); 124EXPORT_SYMBOL_GPL(dsa_dev_to_net_device);
124 125
126/* Determine if we should defer delivery of skb until we have a rx timestamp.
127 *
128 * Called from dsa_switch_rcv. For now, this will only work if tagging is
129 * enabled on the switch. Normally the MAC driver would retrieve the hardware
130 * timestamp when it reads the packet out of the hardware. However in a DSA
131 * switch, the DSA driver owning the interface to which the packet is
132 * delivered is never notified unless we do so here.
133 */
134static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p,
135 struct sk_buff *skb)
136{
137 struct dsa_switch *ds = p->dp->ds;
138 unsigned int type;
139
140 if (skb_headroom(skb) < ETH_HLEN)
141 return false;
142
143 __skb_push(skb, ETH_HLEN);
144
145 type = ptp_classify_raw(skb);
146
147 __skb_pull(skb, ETH_HLEN);
148
149 if (type == PTP_CLASS_NONE)
150 return false;
151
152 if (likely(ds->ops->port_rxtstamp))
153 return ds->ops->port_rxtstamp(ds, p->dp->index, skb, type);
154
155 return false;
156}
157
125static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, 158static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
126 struct packet_type *pt, struct net_device *unused) 159 struct packet_type *pt, struct net_device *unused)
127{ 160{
@@ -157,6 +190,9 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
157 s->rx_bytes += skb->len; 190 s->rx_bytes += skb->len;
158 u64_stats_update_end(&s->syncp); 191 u64_stats_update_end(&s->syncp);
159 192
193 if (dsa_skb_defer_rx_timestamp(p, skb))
194 return 0;
195
160 netif_receive_skb(skb); 196 netif_receive_skb(skb);
161 197
162 return 0; 198 return 0;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index f52307296de4..3376dad6dcfd 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -21,6 +21,7 @@
21#include <net/tc_act/tc_mirred.h> 21#include <net/tc_act/tc_mirred.h>
22#include <linux/if_bridge.h> 22#include <linux/if_bridge.h>
23#include <linux/netpoll.h> 23#include <linux/netpoll.h>
24#include <linux/ptp_classify.h>
24 25
25#include "dsa_priv.h" 26#include "dsa_priv.h"
26 27
@@ -255,6 +256,22 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
255 256
256static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 257static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
257{ 258{
259 struct dsa_slave_priv *p = netdev_priv(dev);
260 struct dsa_switch *ds = p->dp->ds;
261 int port = p->dp->index;
262
263 /* Pass through to switch driver if it supports timestamping */
264 switch (cmd) {
265 case SIOCGHWTSTAMP:
266 if (ds->ops->port_hwtstamp_get)
267 return ds->ops->port_hwtstamp_get(ds, port, ifr);
268 break;
269 case SIOCSHWTSTAMP:
270 if (ds->ops->port_hwtstamp_set)
271 return ds->ops->port_hwtstamp_set(ds, port, ifr);
272 break;
273 }
274
258 if (!dev->phydev) 275 if (!dev->phydev)
259 return -ENODEV; 276 return -ENODEV;
260 277
@@ -385,6 +402,30 @@ static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
385 return NETDEV_TX_OK; 402 return NETDEV_TX_OK;
386} 403}
387 404
405static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p,
406 struct sk_buff *skb)
407{
408 struct dsa_switch *ds = p->dp->ds;
409 struct sk_buff *clone;
410 unsigned int type;
411
412 type = ptp_classify_raw(skb);
413 if (type == PTP_CLASS_NONE)
414 return;
415
416 if (!ds->ops->port_txtstamp)
417 return;
418
419 clone = skb_clone_sk(skb);
420 if (!clone)
421 return;
422
423 if (ds->ops->port_txtstamp(ds, p->dp->index, clone, type))
424 return;
425
426 kfree_skb(clone);
427}
428
388static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) 429static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
389{ 430{
390 struct dsa_slave_priv *p = netdev_priv(dev); 431 struct dsa_slave_priv *p = netdev_priv(dev);
@@ -397,6 +438,11 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
397 s->tx_bytes += skb->len; 438 s->tx_bytes += skb->len;
398 u64_stats_update_end(&s->syncp); 439 u64_stats_update_end(&s->syncp);
399 440
441 /* Identify PTP protocol packets, clone them, and pass them to the
442 * switch driver
443 */
444 dsa_skb_tx_timestamp(p, skb);
445
400 /* Transmit function may have to reallocate the original SKB, 446 /* Transmit function may have to reallocate the original SKB,
401 * in which case it must have freed it. Only free it here on error. 447 * in which case it must have freed it. Only free it here on error.
402 */ 448 */
@@ -918,6 +964,18 @@ static int dsa_slave_set_rxnfc(struct net_device *dev,
918 return ds->ops->set_rxnfc(ds, dp->index, nfc); 964 return ds->ops->set_rxnfc(ds, dp->index, nfc);
919} 965}
920 966
967static int dsa_slave_get_ts_info(struct net_device *dev,
968 struct ethtool_ts_info *ts)
969{
970 struct dsa_slave_priv *p = netdev_priv(dev);
971 struct dsa_switch *ds = p->dp->ds;
972
973 if (!ds->ops->get_ts_info)
974 return -EOPNOTSUPP;
975
976 return ds->ops->get_ts_info(ds, p->dp->index, ts);
977}
978
921static const struct ethtool_ops dsa_slave_ethtool_ops = { 979static const struct ethtool_ops dsa_slave_ethtool_ops = {
922 .get_drvinfo = dsa_slave_get_drvinfo, 980 .get_drvinfo = dsa_slave_get_drvinfo,
923 .get_regs_len = dsa_slave_get_regs_len, 981 .get_regs_len = dsa_slave_get_regs_len,
@@ -938,6 +996,7 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
938 .set_link_ksettings = phy_ethtool_set_link_ksettings, 996 .set_link_ksettings = phy_ethtool_set_link_ksettings,
939 .get_rxnfc = dsa_slave_get_rxnfc, 997 .get_rxnfc = dsa_slave_get_rxnfc,
940 .set_rxnfc = dsa_slave_set_rxnfc, 998 .set_rxnfc = dsa_slave_set_rxnfc,
999 .get_ts_info = dsa_slave_get_ts_info,
941}; 1000};
942 1001
943/* legacy way, bypassing the bridge *****************************************/ 1002/* legacy way, bypassing the bridge *****************************************/
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e4329e161943..e8c7fad8c329 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -723,7 +723,7 @@ EXPORT_SYMBOL(inet_accept);
723 * This does both peername and sockname. 723 * This does both peername and sockname.
724 */ 724 */
725int inet_getname(struct socket *sock, struct sockaddr *uaddr, 725int inet_getname(struct socket *sock, struct sockaddr *uaddr,
726 int *uaddr_len, int peer) 726 int peer)
727{ 727{
728 struct sock *sk = sock->sk; 728 struct sock *sk = sock->sk;
729 struct inet_sock *inet = inet_sk(sk); 729 struct inet_sock *inet = inet_sk(sk);
@@ -745,8 +745,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
745 sin->sin_addr.s_addr = addr; 745 sin->sin_addr.s_addr = addr;
746 } 746 }
747 memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); 747 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
748 *uaddr_len = sizeof(*sin); 748 return sizeof(*sin);
749 return 0;
750} 749}
751EXPORT_SYMBOL(inet_getname); 750EXPORT_SYMBOL(inet_getname);
752 751
@@ -1736,6 +1735,7 @@ static __net_exit void ipv4_mib_exit_net(struct net *net)
1736static __net_initdata struct pernet_operations ipv4_mib_ops = { 1735static __net_initdata struct pernet_operations ipv4_mib_ops = {
1737 .init = ipv4_mib_init_net, 1736 .init = ipv4_mib_init_net,
1738 .exit = ipv4_mib_exit_net, 1737 .exit = ipv4_mib_exit_net,
1738 .async = true,
1739}; 1739};
1740 1740
1741static int __init init_ipv4_mibs(void) 1741static int __init init_ipv4_mibs(void)
@@ -1789,6 +1789,7 @@ static __net_exit void inet_exit_net(struct net *net)
1789static __net_initdata struct pernet_operations af_inet_ops = { 1789static __net_initdata struct pernet_operations af_inet_ops = {
1790 .init = inet_init_net, 1790 .init = inet_init_net,
1791 .exit = inet_exit_net, 1791 .exit = inet_exit_net,
1792 .async = true,
1792}; 1793};
1793 1794
1794static int __init init_inet_pernet_ops(void) 1795static int __init init_inet_pernet_ops(void)
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index f28f06c91ead..7dc9de8444a9 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1447,6 +1447,7 @@ static void __net_exit arp_net_exit(struct net *net)
1447static struct pernet_operations arp_net_ops = { 1447static struct pernet_operations arp_net_ops = {
1448 .init = arp_net_init, 1448 .init = arp_net_init,
1449 .exit = arp_net_exit, 1449 .exit = arp_net_exit,
1450 .async = true,
1450}; 1451};
1451 1452
1452static int __init arp_proc_init(void) 1453static int __init arp_proc_init(void)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 40f001782c1b..5ae0d1f097ca 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2469,6 +2469,7 @@ static __net_exit void devinet_exit_net(struct net *net)
2469static __net_initdata struct pernet_operations devinet_ops = { 2469static __net_initdata struct pernet_operations devinet_ops = {
2470 .init = devinet_init_net, 2470 .init = devinet_init_net,
2471 .exit = devinet_exit_net, 2471 .exit = devinet_exit_net,
2472 .async = true,
2472}; 2473};
2473 2474
2474static struct rtnl_af_ops inet_af_ops __read_mostly = { 2475static struct rtnl_af_ops inet_af_ops __read_mostly = {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f05afaf3235c..ac71c3d496c0 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1362,6 +1362,7 @@ static void __net_exit fib_net_exit(struct net *net)
1362static struct pernet_operations fib_net_ops = { 1362static struct pernet_operations fib_net_ops = {
1363 .init = fib_net_init, 1363 .init = fib_net_init,
1364 .exit = fib_net_exit, 1364 .exit = fib_net_exit,
1365 .async = true,
1365}; 1366};
1366 1367
1367void __init ip_fib_init(void) 1368void __init ip_fib_init(void)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c586597da20d..cd46d7666598 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -646,6 +646,11 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
646 fi->fib_nh, cfg, extack)) 646 fi->fib_nh, cfg, extack))
647 return 1; 647 return 1;
648 } 648 }
649#ifdef CONFIG_IP_ROUTE_CLASSID
650 if (cfg->fc_flow &&
651 cfg->fc_flow != fi->fib_nh->nh_tclassid)
652 return 1;
653#endif
649 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && 654 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
650 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) 655 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
651 return 0; 656 return 0;
@@ -1760,13 +1765,11 @@ void fib_select_multipath(struct fib_result *res, int hash)
1760void fib_select_path(struct net *net, struct fib_result *res, 1765void fib_select_path(struct net *net, struct fib_result *res,
1761 struct flowi4 *fl4, const struct sk_buff *skb) 1766 struct flowi4 *fl4, const struct sk_buff *skb)
1762{ 1767{
1763 bool oif_check; 1768 if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
1764 1769 goto check_saddr;
1765 oif_check = (fl4->flowi4_oif == 0 ||
1766 fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF);
1767 1770
1768#ifdef CONFIG_IP_ROUTE_MULTIPATH 1771#ifdef CONFIG_IP_ROUTE_MULTIPATH
1769 if (res->fi->fib_nhs > 1 && oif_check) { 1772 if (res->fi->fib_nhs > 1) {
1770 int h = fib_multipath_hash(res->fi, fl4, skb); 1773 int h = fib_multipath_hash(res->fi, fl4, skb);
1771 1774
1772 fib_select_multipath(res, h); 1775 fib_select_multipath(res, h);
@@ -1775,10 +1778,10 @@ void fib_select_path(struct net *net, struct fib_result *res,
1775#endif 1778#endif
1776 if (!res->prefixlen && 1779 if (!res->prefixlen &&
1777 res->table->tb_num_default > 1 && 1780 res->table->tb_num_default > 1 &&
1778 res->type == RTN_UNICAST && oif_check) 1781 res->type == RTN_UNICAST)
1779 fib_select_default(fl4, res); 1782 fib_select_default(fl4, res);
1780 1783
1784check_saddr:
1781 if (!fl4->saddr) 1785 if (!fl4->saddr)
1782 fl4->saddr = FIB_RES_PREFSRC(net, *res); 1786 fl4->saddr = FIB_RES_PREFSRC(net, *res);
1783} 1787}
1784EXPORT_SYMBOL_GPL(fib_select_path);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 1617604c9284..cc56efa64d5c 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1257,6 +1257,7 @@ fail:
1257static struct pernet_operations __net_initdata icmp_sk_ops = { 1257static struct pernet_operations __net_initdata icmp_sk_ops = {
1258 .init = icmp_sk_init, 1258 .init = icmp_sk_init,
1259 .exit = icmp_sk_exit, 1259 .exit = icmp_sk_exit,
1260 .async = true,
1260}; 1261};
1261 1262
1262int __init icmp_init(void) 1263int __init icmp_init(void)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index f2402581fef1..c2743763777e 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -3028,6 +3028,7 @@ static void __net_exit igmp_net_exit(struct net *net)
3028static struct pernet_operations igmp_net_ops = { 3028static struct pernet_operations igmp_net_ops = {
3029 .init = igmp_net_init, 3029 .init = igmp_net_init,
3030 .exit = igmp_net_exit, 3030 .exit = igmp_net_exit,
3031 .async = true,
3031}; 3032};
3032#endif 3033#endif
3033 3034
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index bbf1b94942c0..5e843ae5e468 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -885,6 +885,7 @@ static void __net_exit ipv4_frags_exit_net(struct net *net)
885static struct pernet_operations ip4_frags_ops = { 885static struct pernet_operations ip4_frags_ops = {
886 .init = ipv4_frags_init_net, 886 .init = ipv4_frags_init_net,
887 .exit = ipv4_frags_exit_net, 887 .exit = ipv4_frags_exit_net,
888 .async = true,
888}; 889};
889 890
890void __init ipfrag_init(void) 891void __init ipfrag_init(void)
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 008be04ac1cc..9dca0fb8c482 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -258,7 +258,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
258 src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); 258 src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
259 if (!ipv6_addr_v4mapped(&src_info->ipi6_addr)) 259 if (!ipv6_addr_v4mapped(&src_info->ipi6_addr))
260 return -EINVAL; 260 return -EINVAL;
261 ipc->oif = src_info->ipi6_ifindex; 261 if (src_info->ipi6_ifindex)
262 ipc->oif = src_info->ipi6_ifindex;
262 ipc->addr = src_info->ipi6_addr.s6_addr32[3]; 263 ipc->addr = src_info->ipi6_addr.s6_addr32[3];
263 continue; 264 continue;
264 } 265 }
@@ -288,7 +289,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
288 if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo))) 289 if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
289 return -EINVAL; 290 return -EINVAL;
290 info = (struct in_pktinfo *)CMSG_DATA(cmsg); 291 info = (struct in_pktinfo *)CMSG_DATA(cmsg);
291 ipc->oif = info->ipi_ifindex; 292 if (info->ipi_ifindex)
293 ipc->oif = info->ipi_ifindex;
292 ipc->addr = info->ipi_spec_dst.s_addr; 294 ipc->addr = info->ipi_spec_dst.s_addr;
293 break; 295 break;
294 } 296 }
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index b05689bbba31..7c7ac9d32e77 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -3327,6 +3327,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
3327static struct pernet_operations ipmr_net_ops = { 3327static struct pernet_operations ipmr_net_ops = {
3328 .init = ipmr_net_init, 3328 .init = ipmr_net_init,
3329 .exit = ipmr_net_exit, 3329 .exit = ipmr_net_exit,
3330 .async = true,
3330}; 3331};
3331 3332
3332int __init ip_mr_init(void) 3333int __init ip_mr_init(void)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 9a71f3149507..39a7cf9160e6 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1911,6 +1911,7 @@ static void __net_exit ip_tables_net_exit(struct net *net)
1911static struct pernet_operations ip_tables_net_ops = { 1911static struct pernet_operations ip_tables_net_ops = {
1912 .init = ip_tables_net_init, 1912 .init = ip_tables_net_init,
1913 .exit = ip_tables_net_exit, 1913 .exit = ip_tables_net_exit,
1914 .async = true,
1914}; 1915};
1915 1916
1916static int __init ip_tables_init(void) 1917static int __init ip_tables_init(void)
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 9ac92ea7b93c..c1c136a93911 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -87,6 +87,7 @@ static void __net_exit iptable_filter_net_exit(struct net *net)
87static struct pernet_operations iptable_filter_net_ops = { 87static struct pernet_operations iptable_filter_net_ops = {
88 .init = iptable_filter_net_init, 88 .init = iptable_filter_net_init,
89 .exit = iptable_filter_net_exit, 89 .exit = iptable_filter_net_exit,
90 .async = true,
90}; 91};
91 92
92static int __init iptable_filter_init(void) 93static int __init iptable_filter_init(void)
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index b8f0db54b197..0164def9c808 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -1204,6 +1204,7 @@ static void __net_exit ping_v4_proc_exit_net(struct net *net)
1204static struct pernet_operations ping_v4_net_ops = { 1204static struct pernet_operations ping_v4_net_ops = {
1205 .init = ping_v4_proc_init_net, 1205 .init = ping_v4_proc_init_net,
1206 .exit = ping_v4_proc_exit_net, 1206 .exit = ping_v4_proc_exit_net,
1207 .async = true,
1207}; 1208};
1208 1209
1209int __init ping_proc_init(void) 1210int __init ping_proc_init(void)
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index dc5edc8f7564..fdabc70283b6 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -549,6 +549,7 @@ static __net_exit void ip_proc_exit_net(struct net *net)
549static __net_initdata struct pernet_operations ip_proc_ops = { 549static __net_initdata struct pernet_operations ip_proc_ops = {
550 .init = ip_proc_init_net, 550 .init = ip_proc_init_net,
551 .exit = ip_proc_exit_net, 551 .exit = ip_proc_exit_net,
552 .async = true,
552}; 553};
553 554
554int __init ip_misc_proc_init(void) 555int __init ip_misc_proc_init(void)
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 9b367fc48d7d..54648d20bf0f 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -1156,6 +1156,7 @@ static __net_exit void raw_exit_net(struct net *net)
1156static __net_initdata struct pernet_operations raw_net_ops = { 1156static __net_initdata struct pernet_operations raw_net_ops = {
1157 .init = raw_init_net, 1157 .init = raw_init_net,
1158 .exit = raw_exit_net, 1158 .exit = raw_exit_net,
1159 .async = true,
1159}; 1160};
1160 1161
1161int __init raw_proc_init(void) 1162int __init raw_proc_init(void)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 49cc1c1df1ba..5ca7415cd48c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -417,6 +417,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net)
417static struct pernet_operations ip_rt_proc_ops __net_initdata = { 417static struct pernet_operations ip_rt_proc_ops __net_initdata = {
418 .init = ip_rt_do_proc_init, 418 .init = ip_rt_do_proc_init,
419 .exit = ip_rt_do_proc_exit, 419 .exit = ip_rt_do_proc_exit,
420 .async = true,
420}; 421};
421 422
422static int __init ip_rt_proc_init(void) 423static int __init ip_rt_proc_init(void)
@@ -1508,7 +1509,6 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
1508 rt->rt_pmtu = 0; 1509 rt->rt_pmtu = 0;
1509 rt->rt_gateway = 0; 1510 rt->rt_gateway = 0;
1510 rt->rt_uses_gateway = 0; 1511 rt->rt_uses_gateway = 0;
1511 rt->rt_table_id = 0;
1512 INIT_LIST_HEAD(&rt->rt_uncached); 1512 INIT_LIST_HEAD(&rt->rt_uncached);
1513 1513
1514 rt->dst.output = ip_output; 1514 rt->dst.output = ip_output;
@@ -1644,19 +1644,6 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
1644 spin_unlock_bh(&fnhe_lock); 1644 spin_unlock_bh(&fnhe_lock);
1645} 1645}
1646 1646
1647static void set_lwt_redirect(struct rtable *rth)
1648{
1649 if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
1650 rth->dst.lwtstate->orig_output = rth->dst.output;
1651 rth->dst.output = lwtunnel_output;
1652 }
1653
1654 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
1655 rth->dst.lwtstate->orig_input = rth->dst.input;
1656 rth->dst.input = lwtunnel_input;
1657 }
1658}
1659
1660/* called in rcu_read_lock() section */ 1647/* called in rcu_read_lock() section */
1661static int __mkroute_input(struct sk_buff *skb, 1648static int __mkroute_input(struct sk_buff *skb,
1662 const struct fib_result *res, 1649 const struct fib_result *res,
@@ -1739,15 +1726,13 @@ rt_cache:
1739 } 1726 }
1740 1727
1741 rth->rt_is_input = 1; 1728 rth->rt_is_input = 1;
1742 if (res->table)
1743 rth->rt_table_id = res->table->tb_id;
1744 RT_CACHE_STAT_INC(in_slow_tot); 1729 RT_CACHE_STAT_INC(in_slow_tot);
1745 1730
1746 rth->dst.input = ip_forward; 1731 rth->dst.input = ip_forward;
1747 1732
1748 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag, 1733 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
1749 do_cache); 1734 do_cache);
1750 set_lwt_redirect(rth); 1735 lwtunnel_set_redirect(&rth->dst);
1751 skb_dst_set(skb, &rth->dst); 1736 skb_dst_set(skb, &rth->dst);
1752out: 1737out:
1753 err = 0; 1738 err = 0;
@@ -1846,7 +1831,6 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
1846 1831
1847 return mhash >> 1; 1832 return mhash >> 1;
1848} 1833}
1849EXPORT_SYMBOL_GPL(fib_multipath_hash);
1850#endif /* CONFIG_IP_ROUTE_MULTIPATH */ 1834#endif /* CONFIG_IP_ROUTE_MULTIPATH */
1851 1835
1852static int ip_mkroute_input(struct sk_buff *skb, 1836static int ip_mkroute_input(struct sk_buff *skb,
@@ -2014,8 +1998,6 @@ local_input:
2014 rth->dst.tclassid = itag; 1998 rth->dst.tclassid = itag;
2015#endif 1999#endif
2016 rth->rt_is_input = 1; 2000 rth->rt_is_input = 1;
2017 if (res->table)
2018 rth->rt_table_id = res->table->tb_id;
2019 2001
2020 RT_CACHE_STAT_INC(in_slow_tot); 2002 RT_CACHE_STAT_INC(in_slow_tot);
2021 if (res->type == RTN_UNREACHABLE) { 2003 if (res->type == RTN_UNREACHABLE) {
@@ -2244,8 +2226,6 @@ add:
2244 return ERR_PTR(-ENOBUFS); 2226 return ERR_PTR(-ENOBUFS);
2245 2227
2246 rth->rt_iif = orig_oif; 2228 rth->rt_iif = orig_oif;
2247 if (res->table)
2248 rth->rt_table_id = res->table->tb_id;
2249 2229
2250 RT_CACHE_STAT_INC(out_slow_tot); 2230 RT_CACHE_STAT_INC(out_slow_tot);
2251 2231
@@ -2267,7 +2247,7 @@ add:
2267 } 2247 }
2268 2248
2269 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache); 2249 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
2270 set_lwt_redirect(rth); 2250 lwtunnel_set_redirect(&rth->dst);
2271 2251
2272 return rth; 2252 return rth;
2273} 2253}
@@ -2775,7 +2755,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2775 rt->rt_flags |= RTCF_NOTIFY; 2755 rt->rt_flags |= RTCF_NOTIFY;
2776 2756
2777 if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE) 2757 if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
2778 table_id = rt->rt_table_id; 2758 table_id = res.table ? res.table->tb_id : 0;
2779 2759
2780 if (rtm->rtm_flags & RTM_F_FIB_MATCH) { 2760 if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
2781 if (!res.fi) { 2761 if (!res.fi) {
@@ -2994,6 +2974,7 @@ static __net_exit void sysctl_route_net_exit(struct net *net)
2994static __net_initdata struct pernet_operations sysctl_route_ops = { 2974static __net_initdata struct pernet_operations sysctl_route_ops = {
2995 .init = sysctl_route_net_init, 2975 .init = sysctl_route_net_init,
2996 .exit = sysctl_route_net_exit, 2976 .exit = sysctl_route_net_exit,
2977 .async = true,
2997}; 2978};
2998#endif 2979#endif
2999 2980
@@ -3007,6 +2988,7 @@ static __net_init int rt_genid_init(struct net *net)
3007 2988
3008static __net_initdata struct pernet_operations rt_genid_ops = { 2989static __net_initdata struct pernet_operations rt_genid_ops = {
3009 .init = rt_genid_init, 2990 .init = rt_genid_init,
2991 .async = true,
3010}; 2992};
3011 2993
3012static int __net_init ipv4_inetpeer_init(struct net *net) 2994static int __net_init ipv4_inetpeer_init(struct net *net)
@@ -3032,6 +3014,7 @@ static void __net_exit ipv4_inetpeer_exit(struct net *net)
3032static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { 3014static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
3033 .init = ipv4_inetpeer_init, 3015 .init = ipv4_inetpeer_init,
3034 .exit = ipv4_inetpeer_exit, 3016 .exit = ipv4_inetpeer_exit,
3017 .async = true,
3035}; 3018};
3036 3019
3037#ifdef CONFIG_IP_ROUTE_CLASSID 3020#ifdef CONFIG_IP_ROUTE_CLASSID
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 93e172118a94..89683d868b37 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1219,6 +1219,7 @@ static __net_exit void ipv4_sysctl_exit_net(struct net *net)
1219static __net_initdata struct pernet_operations ipv4_sysctl_ops = { 1219static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
1220 .init = ipv4_sysctl_init_net, 1220 .init = ipv4_sysctl_init_net,
1221 .exit = ipv4_sysctl_exit_net, 1221 .exit = ipv4_sysctl_exit_net,
1222 .async = true,
1222}; 1223};
1223 1224
1224static __init int sysctl_ipv4_init(void) 1225static __init int sysctl_ipv4_init(void)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 48636aee23c3..a33539798bf6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -453,6 +453,7 @@ void tcp_init_sock(struct sock *sk)
453 sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; 453 sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
454 454
455 sk_sockets_allocated_inc(sk); 455 sk_sockets_allocated_inc(sk);
456 sk->sk_route_forced_caps = NETIF_F_GSO;
456} 457}
457EXPORT_SYMBOL(tcp_init_sock); 458EXPORT_SYMBOL(tcp_init_sock);
458 459
@@ -897,7 +898,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
897 struct tcp_sock *tp = tcp_sk(sk); 898 struct tcp_sock *tp = tcp_sk(sk);
898 u32 new_size_goal, size_goal; 899 u32 new_size_goal, size_goal;
899 900
900 if (!large_allowed || !sk_can_gso(sk)) 901 if (!large_allowed)
901 return mss_now; 902 return mss_now;
902 903
903 /* Note : tcp_tso_autosize() will eventually split this later */ 904 /* Note : tcp_tso_autosize() will eventually split this later */
@@ -1062,8 +1063,7 @@ EXPORT_SYMBOL_GPL(do_tcp_sendpages);
1062int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, 1063int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
1063 size_t size, int flags) 1064 size_t size, int flags)
1064{ 1065{
1065 if (!(sk->sk_route_caps & NETIF_F_SG) || 1066 if (!(sk->sk_route_caps & NETIF_F_SG))
1066 !sk_check_csum_caps(sk))
1067 return sock_no_sendpage_locked(sk, page, offset, size, flags); 1067 return sock_no_sendpage_locked(sk, page, offset, size, flags);
1068 1068
1069 tcp_rate_check_app_limited(sk); /* is sending application-limited? */ 1069 tcp_rate_check_app_limited(sk); /* is sending application-limited? */
@@ -1102,27 +1102,11 @@ static int linear_payload_sz(bool first_skb)
1102 return 0; 1102 return 0;
1103} 1103}
1104 1104
1105static int select_size(const struct sock *sk, bool sg, bool first_skb, bool zc) 1105static int select_size(bool first_skb, bool zc)
1106{ 1106{
1107 const struct tcp_sock *tp = tcp_sk(sk); 1107 if (zc)
1108 int tmp = tp->mss_cache; 1108 return 0;
1109 1109 return linear_payload_sz(first_skb);
1110 if (sg) {
1111 if (zc)
1112 return 0;
1113
1114 if (sk_can_gso(sk)) {
1115 tmp = linear_payload_sz(first_skb);
1116 } else {
1117 int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
1118
1119 if (tmp >= pgbreak &&
1120 tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
1121 tmp = pgbreak;
1122 }
1123 }
1124
1125 return tmp;
1126} 1110}
1127 1111
1128void tcp_free_fastopen_req(struct tcp_sock *tp) 1112void tcp_free_fastopen_req(struct tcp_sock *tp)
@@ -1187,7 +1171,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
1187 int flags, err, copied = 0; 1171 int flags, err, copied = 0;
1188 int mss_now = 0, size_goal, copied_syn = 0; 1172 int mss_now = 0, size_goal, copied_syn = 0;
1189 bool process_backlog = false; 1173 bool process_backlog = false;
1190 bool sg, zc = false; 1174 bool zc = false;
1191 long timeo; 1175 long timeo;
1192 1176
1193 flags = msg->msg_flags; 1177 flags = msg->msg_flags;
@@ -1205,7 +1189,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
1205 goto out_err; 1189 goto out_err;
1206 } 1190 }
1207 1191
1208 zc = sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG; 1192 zc = sk->sk_route_caps & NETIF_F_SG;
1209 if (!zc) 1193 if (!zc)
1210 uarg->zerocopy = 0; 1194 uarg->zerocopy = 0;
1211 } 1195 }
@@ -1268,18 +1252,12 @@ restart:
1268 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 1252 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
1269 goto do_error; 1253 goto do_error;
1270 1254
1271 sg = !!(sk->sk_route_caps & NETIF_F_SG);
1272
1273 while (msg_data_left(msg)) { 1255 while (msg_data_left(msg)) {
1274 int copy = 0; 1256 int copy = 0;
1275 int max = size_goal;
1276 1257
1277 skb = tcp_write_queue_tail(sk); 1258 skb = tcp_write_queue_tail(sk);
1278 if (skb) { 1259 if (skb)
1279 if (skb->ip_summed == CHECKSUM_NONE) 1260 copy = size_goal - skb->len;
1280 max = mss_now;
1281 copy = max - skb->len;
1282 }
1283 1261
1284 if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) { 1262 if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
1285 bool first_skb; 1263 bool first_skb;
@@ -1297,22 +1275,17 @@ new_segment:
1297 goto restart; 1275 goto restart;
1298 } 1276 }
1299 first_skb = tcp_rtx_and_write_queues_empty(sk); 1277 first_skb = tcp_rtx_and_write_queues_empty(sk);
1300 linear = select_size(sk, sg, first_skb, zc); 1278 linear = select_size(first_skb, zc);
1301 skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation, 1279 skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation,
1302 first_skb); 1280 first_skb);
1303 if (!skb) 1281 if (!skb)
1304 goto wait_for_memory; 1282 goto wait_for_memory;
1305 1283
1306 process_backlog = true; 1284 process_backlog = true;
1307 /* 1285 skb->ip_summed = CHECKSUM_PARTIAL;
1308 * Check whether we can use HW checksum.
1309 */
1310 if (sk_check_csum_caps(sk))
1311 skb->ip_summed = CHECKSUM_PARTIAL;
1312 1286
1313 skb_entail(sk, skb); 1287 skb_entail(sk, skb);
1314 copy = size_goal; 1288 copy = size_goal;
1315 max = size_goal;
1316 1289
1317 /* All packets are restored as if they have 1290 /* All packets are restored as if they have
1318 * already been sent. skb_mstamp isn't set to 1291 * already been sent. skb_mstamp isn't set to
@@ -1343,7 +1316,7 @@ new_segment:
1343 1316
1344 if (!skb_can_coalesce(skb, i, pfrag->page, 1317 if (!skb_can_coalesce(skb, i, pfrag->page,
1345 pfrag->offset)) { 1318 pfrag->offset)) {
1346 if (i >= sysctl_max_skb_frags || !sg) { 1319 if (i >= sysctl_max_skb_frags) {
1347 tcp_mark_push(tp, skb); 1320 tcp_mark_push(tp, skb);
1348 goto new_segment; 1321 goto new_segment;
1349 } 1322 }
@@ -1396,7 +1369,7 @@ new_segment:
1396 goto out; 1369 goto out;
1397 } 1370 }
1398 1371
1399 if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) 1372 if (skb->len < size_goal || (flags & MSG_OOB) || unlikely(tp->repair))
1400 continue; 1373 continue;
1401 1374
1402 if (forced_push(tp)) { 1375 if (forced_push(tp)) {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 575d3c1fb6e8..06b9c4765f42 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1358,9 +1358,6 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1358 int len; 1358 int len;
1359 int in_sack; 1359 int in_sack;
1360 1360
1361 if (!sk_can_gso(sk))
1362 goto fallback;
1363
1364 /* Normally R but no L won't result in plain S */ 1361 /* Normally R but no L won't result in plain S */
1365 if (!dup_sack && 1362 if (!dup_sack &&
1366 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS) 1363 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
@@ -5870,10 +5867,12 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
5870 tp->rx_opt.saw_tstamp = 0; 5867 tp->rx_opt.saw_tstamp = 0;
5871 req = tp->fastopen_rsk; 5868 req = tp->fastopen_rsk;
5872 if (req) { 5869 if (req) {
5870 bool req_stolen;
5871
5873 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && 5872 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
5874 sk->sk_state != TCP_FIN_WAIT1); 5873 sk->sk_state != TCP_FIN_WAIT1);
5875 5874
5876 if (!tcp_check_req(sk, skb, req, true)) 5875 if (!tcp_check_req(sk, skb, req, true, &req_stolen))
5877 goto discard; 5876 goto discard;
5878 } 5877 }
5879 5878
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f8ad397e285e..2c6aec2643e8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -561,16 +561,9 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
561{ 561{
562 struct tcphdr *th = tcp_hdr(skb); 562 struct tcphdr *th = tcp_hdr(skb);
563 563
564 if (skb->ip_summed == CHECKSUM_PARTIAL) { 564 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
565 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0); 565 skb->csum_start = skb_transport_header(skb) - skb->head;
566 skb->csum_start = skb_transport_header(skb) - skb->head; 566 skb->csum_offset = offsetof(struct tcphdr, check);
567 skb->csum_offset = offsetof(struct tcphdr, check);
568 } else {
569 th->check = tcp_v4_check(skb->len, saddr, daddr,
570 csum_partial(th,
571 th->doff << 2,
572 skb->csum));
573 }
574} 567}
575 568
576/* This routine computes an IPv4 TCP checksum. */ 569/* This routine computes an IPv4 TCP checksum. */
@@ -1672,6 +1665,7 @@ process:
1672 1665
1673 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1666 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1674 struct request_sock *req = inet_reqsk(sk); 1667 struct request_sock *req = inet_reqsk(sk);
1668 bool req_stolen = false;
1675 struct sock *nsk; 1669 struct sock *nsk;
1676 1670
1677 sk = req->rsk_listener; 1671 sk = req->rsk_listener;
@@ -1694,10 +1688,20 @@ process:
1694 th = (const struct tcphdr *)skb->data; 1688 th = (const struct tcphdr *)skb->data;
1695 iph = ip_hdr(skb); 1689 iph = ip_hdr(skb);
1696 tcp_v4_fill_cb(skb, iph, th); 1690 tcp_v4_fill_cb(skb, iph, th);
1697 nsk = tcp_check_req(sk, skb, req, false); 1691 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1698 } 1692 }
1699 if (!nsk) { 1693 if (!nsk) {
1700 reqsk_put(req); 1694 reqsk_put(req);
1695 if (req_stolen) {
1696 /* Another cpu got exclusive access to req
1697 * and created a full blown socket.
1698 * Try to feed this packet to this socket
1699 * instead of discarding it.
1700 */
1701 tcp_v4_restore_cb(skb);
1702 sock_put(sk);
1703 goto lookup;
1704 }
1701 goto discard_and_relse; 1705 goto discard_and_relse;
1702 } 1706 }
1703 if (nsk == sk) { 1707 if (nsk == sk) {
@@ -2387,6 +2391,7 @@ static void __net_exit tcp4_proc_exit_net(struct net *net)
2387static struct pernet_operations tcp4_net_ops = { 2391static struct pernet_operations tcp4_net_ops = {
2388 .init = tcp4_proc_init_net, 2392 .init = tcp4_proc_init_net,
2389 .exit = tcp4_proc_exit_net, 2393 .exit = tcp4_proc_exit_net,
2394 .async = true,
2390}; 2395};
2391 2396
2392int __init tcp4_proc_init(void) 2397int __init tcp4_proc_init(void)
@@ -2573,6 +2578,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
2573 .init = tcp_sk_init, 2578 .init = tcp_sk_init,
2574 .exit = tcp_sk_exit, 2579 .exit = tcp_sk_exit,
2575 .exit_batch = tcp_sk_exit_batch, 2580 .exit_batch = tcp_sk_exit_batch,
2581 .async = true,
2576}; 2582};
2577 2583
2578void __init tcp_v4_init(void) 2584void __init tcp_v4_init(void)
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 03b51cdcc731..aa6fea9f3328 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -1024,6 +1024,7 @@ static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_lis
1024static __net_initdata struct pernet_operations tcp_net_metrics_ops = { 1024static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
1025 .init = tcp_net_metrics_init, 1025 .init = tcp_net_metrics_init,
1026 .exit_batch = tcp_net_metrics_exit_batch, 1026 .exit_batch = tcp_net_metrics_exit_batch,
1027 .async = true,
1027}; 1028};
1028 1029
1029void __init tcp_metrics_init(void) 1030void __init tcp_metrics_init(void)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a8384b0c11f8..e7e36433cdb5 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -578,7 +578,7 @@ EXPORT_SYMBOL(tcp_create_openreq_child);
578 578
579struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, 579struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
580 struct request_sock *req, 580 struct request_sock *req,
581 bool fastopen) 581 bool fastopen, bool *req_stolen)
582{ 582{
583 struct tcp_options_received tmp_opt; 583 struct tcp_options_received tmp_opt;
584 struct sock *child; 584 struct sock *child;
@@ -785,6 +785,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
785 785
786 sock_rps_save_rxhash(child, skb); 786 sock_rps_save_rxhash(child, skb);
787 tcp_synack_rtt_meas(child, req); 787 tcp_synack_rtt_meas(child, req);
788 *req_stolen = !own_req;
788 return inet_csk_complete_hashdance(sk, child, req, own_req); 789 return inet_csk_complete_hashdance(sk, child, req, own_req);
789 790
790listen_overflow: 791listen_overflow:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e9f985e42405..8795d76f987c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1206,7 +1206,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
1206/* Initialize TSO segments for a packet. */ 1206/* Initialize TSO segments for a packet. */
1207static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now) 1207static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
1208{ 1208{
1209 if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) { 1209 if (skb->len <= mss_now) {
1210 /* Avoid the costly divide in the normal 1210 /* Avoid the costly divide in the normal
1211 * non-TSO case. 1211 * non-TSO case.
1212 */ 1212 */
@@ -1335,21 +1335,9 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
1335 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; 1335 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1336 tcp_skb_fragment_eor(skb, buff); 1336 tcp_skb_fragment_eor(skb, buff);
1337 1337
1338 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { 1338 skb_split(skb, buff, len);
1339 /* Copy and checksum data tail into the new buffer. */
1340 buff->csum = csum_partial_copy_nocheck(skb->data + len,
1341 skb_put(buff, nsize),
1342 nsize, 0);
1343
1344 skb_trim(skb, len);
1345
1346 skb->csum = csum_block_sub(skb->csum, buff->csum, len);
1347 } else {
1348 skb->ip_summed = CHECKSUM_PARTIAL;
1349 skb_split(skb, buff, len);
1350 }
1351 1339
1352 buff->ip_summed = skb->ip_summed; 1340 buff->ip_summed = CHECKSUM_PARTIAL;
1353 1341
1354 buff->tstamp = skb->tstamp; 1342 buff->tstamp = skb->tstamp;
1355 tcp_fragment_tstamp(skb, buff); 1343 tcp_fragment_tstamp(skb, buff);
@@ -1901,7 +1889,7 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
1901 1889
1902 tcp_skb_fragment_eor(skb, buff); 1890 tcp_skb_fragment_eor(skb, buff);
1903 1891
1904 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL; 1892 buff->ip_summed = CHECKSUM_PARTIAL;
1905 skb_split(skb, buff, len); 1893 skb_split(skb, buff, len);
1906 tcp_fragment_tstamp(skb, buff); 1894 tcp_fragment_tstamp(skb, buff);
1907 1895
@@ -2027,6 +2015,24 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
2027 } 2015 }
2028} 2016}
2029 2017
2018static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
2019{
2020 struct sk_buff *skb, *next;
2021
2022 skb = tcp_send_head(sk);
2023 tcp_for_write_queue_from_safe(skb, next, sk) {
2024 if (len <= skb->len)
2025 break;
2026
2027 if (unlikely(TCP_SKB_CB(skb)->eor))
2028 return false;
2029
2030 len -= skb->len;
2031 }
2032
2033 return true;
2034}
2035
2030/* Create a new MTU probe if we are ready. 2036/* Create a new MTU probe if we are ready.
2031 * MTU probe is regularly attempting to increase the path MTU by 2037 * MTU probe is regularly attempting to increase the path MTU by
2032 * deliberately sending larger packets. This discovers routing 2038 * deliberately sending larger packets. This discovers routing
@@ -2099,6 +2105,9 @@ static int tcp_mtu_probe(struct sock *sk)
2099 return 0; 2105 return 0;
2100 } 2106 }
2101 2107
2108 if (!tcp_can_coalesce_send_queue_head(sk, probe_size))
2109 return -1;
2110
2102 /* We're allowed to probe. Build it now. */ 2111 /* We're allowed to probe. Build it now. */
2103 nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false); 2112 nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
2104 if (!nskb) 2113 if (!nskb)
@@ -2113,7 +2122,7 @@ static int tcp_mtu_probe(struct sock *sk)
2113 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK; 2122 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
2114 TCP_SKB_CB(nskb)->sacked = 0; 2123 TCP_SKB_CB(nskb)->sacked = 0;
2115 nskb->csum = 0; 2124 nskb->csum = 0;
2116 nskb->ip_summed = skb->ip_summed; 2125 nskb->ip_summed = CHECKSUM_PARTIAL;
2117 2126
2118 tcp_insert_write_queue_before(nskb, skb, sk); 2127 tcp_insert_write_queue_before(nskb, skb, sk);
2119 tcp_highest_sack_replace(sk, skb, nskb); 2128 tcp_highest_sack_replace(sk, skb, nskb);
@@ -2121,19 +2130,16 @@ static int tcp_mtu_probe(struct sock *sk)
2121 len = 0; 2130 len = 0;
2122 tcp_for_write_queue_from_safe(skb, next, sk) { 2131 tcp_for_write_queue_from_safe(skb, next, sk) {
2123 copy = min_t(int, skb->len, probe_size - len); 2132 copy = min_t(int, skb->len, probe_size - len);
2124 if (nskb->ip_summed) { 2133 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
2125 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
2126 } else {
2127 __wsum csum = skb_copy_and_csum_bits(skb, 0,
2128 skb_put(nskb, copy),
2129 copy, 0);
2130 nskb->csum = csum_block_add(nskb->csum, csum, len);
2131 }
2132 2134
2133 if (skb->len <= copy) { 2135 if (skb->len <= copy) {
2134 /* We've eaten all the data from this skb. 2136 /* We've eaten all the data from this skb.
2135 * Throw it away. */ 2137 * Throw it away. */
2136 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; 2138 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
2139 /* If this is the last SKB we copy and eor is set
2140 * we need to propagate it to the new skb.
2141 */
2142 TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor;
2137 tcp_unlink_write_queue(skb, sk); 2143 tcp_unlink_write_queue(skb, sk);
2138 sk_wmem_free_skb(sk, skb); 2144 sk_wmem_free_skb(sk, skb);
2139 } else { 2145 } else {
@@ -2141,9 +2147,6 @@ static int tcp_mtu_probe(struct sock *sk)
2141 ~(TCPHDR_FIN|TCPHDR_PSH); 2147 ~(TCPHDR_FIN|TCPHDR_PSH);
2142 if (!skb_shinfo(skb)->nr_frags) { 2148 if (!skb_shinfo(skb)->nr_frags) {
2143 skb_pull(skb, copy); 2149 skb_pull(skb, copy);
2144 if (skb->ip_summed != CHECKSUM_PARTIAL)
2145 skb->csum = csum_partial(skb->data,
2146 skb->len, 0);
2147 } else { 2150 } else {
2148 __pskb_trim_head(skb, copy); 2151 __pskb_trim_head(skb, copy);
2149 tcp_set_skb_tso_segs(skb, mss_now); 2152 tcp_set_skb_tso_segs(skb, mss_now);
@@ -2721,12 +2724,6 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2721 } 2724 }
2722 tcp_highest_sack_replace(sk, next_skb, skb); 2725 tcp_highest_sack_replace(sk, next_skb, skb);
2723 2726
2724 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
2725 skb->ip_summed = CHECKSUM_PARTIAL;
2726
2727 if (skb->ip_summed != CHECKSUM_PARTIAL)
2728 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
2729
2730 /* Update sequence range on original skb. */ 2727 /* Update sequence range on original skb. */
2731 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; 2728 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
2732 2729
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index bfaefe560b5c..3013404d0935 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2024,6 +2024,11 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
2024 err = udplite_checksum_init(skb, uh); 2024 err = udplite_checksum_init(skb, uh);
2025 if (err) 2025 if (err)
2026 return err; 2026 return err;
2027
2028 if (UDP_SKB_CB(skb)->partial_cov) {
2029 skb->csum = inet_compute_pseudo(skb, proto);
2030 return 0;
2031 }
2027 } 2032 }
2028 2033
2029 /* Note, we are only interested in != 0 or == 0, thus the 2034 /* Note, we are only interested in != 0 or == 0, thus the
@@ -2757,6 +2762,7 @@ static void __net_exit udp4_proc_exit_net(struct net *net)
2757static struct pernet_operations udp4_net_ops = { 2762static struct pernet_operations udp4_net_ops = {
2758 .init = udp4_proc_init_net, 2763 .init = udp4_proc_init_net,
2759 .exit = udp4_proc_exit_net, 2764 .exit = udp4_proc_exit_net,
2765 .async = true,
2760}; 2766};
2761 2767
2762int __init udp4_proc_init(void) 2768int __init udp4_proc_init(void)
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index f96614e9b9a5..72f2c3806408 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -104,6 +104,7 @@ static void __net_exit udplite4_proc_exit_net(struct net *net)
104static struct pernet_operations udplite4_net_ops = { 104static struct pernet_operations udplite4_net_ops = {
105 .init = udplite4_proc_init_net, 105 .init = udplite4_proc_init_net,
106 .exit = udplite4_proc_exit_net, 106 .exit = udplite4_proc_exit_net,
107 .async = true,
107}; 108};
108 109
109static __init int udplite4_proc_init(void) 110static __init int udplite4_proc_init(void)
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 05017e2c849c..796ac4115485 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -100,7 +100,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
100 xdst->u.rt.rt_gateway = rt->rt_gateway; 100 xdst->u.rt.rt_gateway = rt->rt_gateway;
101 xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; 101 xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
102 xdst->u.rt.rt_pmtu = rt->rt_pmtu; 102 xdst->u.rt.rt_pmtu = rt->rt_pmtu;
103 xdst->u.rt.rt_table_id = rt->rt_table_id;
104 INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); 103 INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
105 104
106 return 0; 105 return 0;
@@ -365,6 +364,7 @@ static void __net_exit xfrm4_net_exit(struct net *net)
365static struct pernet_operations __net_initdata xfrm4_net_ops = { 364static struct pernet_operations __net_initdata xfrm4_net_ops = {
366 .init = xfrm4_net_init, 365 .init = xfrm4_net_init,
367 .exit = xfrm4_net_exit, 366 .exit = xfrm4_net_exit,
367 .async = true,
368}; 368};
369 369
370static void __init xfrm4_policy_init(void) 370static void __init xfrm4_policy_init(void)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e1846b97ee69..4facfe0b1888 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4257,6 +4257,7 @@ static void __net_exit if6_proc_net_exit(struct net *net)
4257static struct pernet_operations if6_proc_net_ops = { 4257static struct pernet_operations if6_proc_net_ops = {
4258 .init = if6_proc_net_init, 4258 .init = if6_proc_net_init,
4259 .exit = if6_proc_net_exit, 4259 .exit = if6_proc_net_exit,
4260 .async = true,
4260}; 4261};
4261 4262
4262int __init if6_proc_init(void) 4263int __init if6_proc_init(void)
@@ -6550,6 +6551,7 @@ static void __net_exit addrconf_exit_net(struct net *net)
6550static struct pernet_operations addrconf_ops = { 6551static struct pernet_operations addrconf_ops = {
6551 .init = addrconf_init_net, 6552 .init = addrconf_init_net,
6552 .exit = addrconf_exit_net, 6553 .exit = addrconf_exit_net,
6554 .async = true,
6553}; 6555};
6554 6556
6555static struct rtnl_af_ops inet6_ops __read_mostly = { 6557static struct rtnl_af_ops inet6_ops __read_mostly = {
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 1d6ced37ad71..ba2e63633370 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -344,6 +344,7 @@ static void __net_exit ip6addrlbl_net_exit(struct net *net)
344static struct pernet_operations ipv6_addr_label_ops = { 344static struct pernet_operations ipv6_addr_label_ops = {
345 .init = ip6addrlbl_net_init, 345 .init = ip6addrlbl_net_init,
346 .exit = ip6addrlbl_net_exit, 346 .exit = ip6addrlbl_net_exit,
347 .async = true,
347}; 348};
348 349
349int __init ipv6_addr_label_init(void) 350int __init ipv6_addr_label_init(void)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 416917719a6f..dbbe04018813 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -470,7 +470,7 @@ EXPORT_SYMBOL_GPL(inet6_destroy_sock);
470 */ 470 */
471 471
472int inet6_getname(struct socket *sock, struct sockaddr *uaddr, 472int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
473 int *uaddr_len, int peer) 473 int peer)
474{ 474{
475 struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr; 475 struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
476 struct sock *sk = sock->sk; 476 struct sock *sk = sock->sk;
@@ -500,8 +500,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
500 } 500 }
501 sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, 501 sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
502 sk->sk_bound_dev_if); 502 sk->sk_bound_dev_if);
503 *uaddr_len = sizeof(*sin); 503 return sizeof(*sin);
504 return 0;
505} 504}
506EXPORT_SYMBOL(inet6_getname); 505EXPORT_SYMBOL(inet6_getname);
507 506
@@ -858,6 +857,7 @@ static void __net_exit inet6_net_exit(struct net *net)
858static struct pernet_operations inet6_net_ops = { 857static struct pernet_operations inet6_net_ops = {
859 .init = inet6_net_init, 858 .init = inet6_net_init,
860 .exit = inet6_net_exit, 859 .exit = inet6_net_exit,
860 .async = true,
861}; 861};
862 862
863static const struct ipv6_stub ipv6_stub_impl = { 863static const struct ipv6_stub ipv6_stub_impl = {
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index b240f24a6e52..95a2c9e8699a 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -368,6 +368,7 @@ static void __net_exit fib6_rules_net_exit(struct net *net)
368static struct pernet_operations fib6_rules_net_ops = { 368static struct pernet_operations fib6_rules_net_ops = {
369 .init = fib6_rules_net_init, 369 .init = fib6_rules_net_init,
370 .exit = fib6_rules_net_exit, 370 .exit = fib6_rules_net_exit,
371 .async = true,
371}; 372};
372 373
373int __init fib6_rules_init(void) 374int __init fib6_rules_init(void)
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6ae5dd3f4d0d..4fa4f1b150a4 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -997,6 +997,7 @@ static void __net_exit icmpv6_sk_exit(struct net *net)
997static struct pernet_operations icmpv6_sk_ops = { 997static struct pernet_operations icmpv6_sk_ops = {
998 .init = icmpv6_sk_init, 998 .init = icmpv6_sk_init,
999 .exit = icmpv6_sk_exit, 999 .exit = icmpv6_sk_exit,
1000 .async = true,
1000}; 1001};
1001 1002
1002int __init icmpv6_init(void) 1003int __init icmpv6_init(void)
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index ec43d18b5ff9..547515e8450a 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -73,6 +73,11 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
73 err = udplite_checksum_init(skb, uh); 73 err = udplite_checksum_init(skb, uh);
74 if (err) 74 if (err)
75 return err; 75 return err;
76
77 if (UDP_SKB_CB(skb)->partial_cov) {
78 skb->csum = ip6_compute_pseudo(skb, proto);
79 return 0;
80 }
76 } 81 }
77 82
78 /* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels) 83 /* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 92b8d8c75eed..cab95cf3b39f 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2160,6 +2160,7 @@ static void fib6_net_exit(struct net *net)
2160static struct pernet_operations fib6_net_ops = { 2160static struct pernet_operations fib6_net_ops = {
2161 .init = fib6_net_init, 2161 .init = fib6_net_init,
2162 .exit = fib6_net_exit, 2162 .exit = fib6_net_exit,
2163 .async = true,
2163}; 2164};
2164 2165
2165int __init fib6_init(void) 2166int __init fib6_init(void)
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 3dab664ff503..6ddf52282894 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -873,6 +873,7 @@ static void __net_exit ip6_flowlabel_net_exit(struct net *net)
873static struct pernet_operations ip6_flowlabel_net_ops = { 873static struct pernet_operations ip6_flowlabel_net_ops = {
874 .init = ip6_flowlabel_proc_init, 874 .init = ip6_flowlabel_proc_init,
875 .exit = ip6_flowlabel_net_exit, 875 .exit = ip6_flowlabel_net_exit,
876 .async = true,
876}; 877};
877 878
878int ip6_flowlabel_init(void) 879int ip6_flowlabel_init(void)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 9f6cace9c817..295eb5ecaee5 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1397,6 +1397,7 @@ static void __net_exit ip6mr_net_exit(struct net *net)
1397static struct pernet_operations ip6mr_net_ops = { 1397static struct pernet_operations ip6mr_net_ops = {
1398 .init = ip6mr_net_init, 1398 .init = ip6mr_net_init,
1399 .exit = ip6mr_net_exit, 1399 .exit = ip6mr_net_exit,
1400 .async = true,
1400}; 1401};
1401 1402
1402int __init ip6_mr_init(void) 1403int __init ip6_mr_init(void)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 9b9d2ff01b35..d9bb933dd5c4 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2997,6 +2997,7 @@ static void __net_exit igmp6_net_exit(struct net *net)
2997static struct pernet_operations igmp6_net_ops = { 2997static struct pernet_operations igmp6_net_ops = {
2998 .init = igmp6_net_init, 2998 .init = igmp6_net_init,
2999 .exit = igmp6_net_exit, 2999 .exit = igmp6_net_exit,
3000 .async = true,
3000}; 3001};
3001 3002
3002int __init igmp6_init(void) 3003int __init igmp6_init(void)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f61a5b613b52..0a19ce3a6f7f 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1882,6 +1882,7 @@ static void __net_exit ndisc_net_exit(struct net *net)
1882static struct pernet_operations ndisc_net_ops = { 1882static struct pernet_operations ndisc_net_ops = {
1883 .init = ndisc_net_init, 1883 .init = ndisc_net_init,
1884 .exit = ndisc_net_exit, 1884 .exit = ndisc_net_exit,
1885 .async = true,
1885}; 1886};
1886 1887
1887int __init ndisc_init(void) 1888int __init ndisc_init(void)
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index d12c55dad7d1..318c6e914234 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -240,6 +240,7 @@ static void __net_init ping_v6_proc_exit_net(struct net *net)
240static struct pernet_operations ping_v6_net_ops = { 240static struct pernet_operations ping_v6_net_ops = {
241 .init = ping_v6_proc_init_net, 241 .init = ping_v6_proc_init_net,
242 .exit = ping_v6_proc_exit_net, 242 .exit = ping_v6_proc_exit_net,
243 .async = true,
243}; 244};
244#endif 245#endif
245 246
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index b67814242f78..b8858c546f41 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -343,6 +343,7 @@ static void __net_exit ipv6_proc_exit_net(struct net *net)
343static struct pernet_operations ipv6_proc_ops = { 343static struct pernet_operations ipv6_proc_ops = {
344 .init = ipv6_proc_init_net, 344 .init = ipv6_proc_init_net,
345 .exit = ipv6_proc_exit_net, 345 .exit = ipv6_proc_exit_net,
346 .async = true,
346}; 347};
347 348
348int __init ipv6_misc_proc_init(void) 349int __init ipv6_misc_proc_init(void)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 4c25339b1984..10a4ac4933b7 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1332,6 +1332,7 @@ static void __net_exit raw6_exit_net(struct net *net)
1332static struct pernet_operations raw6_net_ops = { 1332static struct pernet_operations raw6_net_ops = {
1333 .init = raw6_init_net, 1333 .init = raw6_init_net,
1334 .exit = raw6_exit_net, 1334 .exit = raw6_exit_net,
1335 .async = true,
1335}; 1336};
1336 1337
1337int __init raw6_proc_init(void) 1338int __init raw6_proc_init(void)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index afbc000ad4f2..b5da69c83123 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -733,6 +733,7 @@ static void __net_exit ipv6_frags_exit_net(struct net *net)
733static struct pernet_operations ip6_frags_ops = { 733static struct pernet_operations ip6_frags_ops = {
734 .init = ipv6_frags_init_net, 734 .init = ipv6_frags_init_net,
735 .exit = ipv6_frags_exit_net, 735 .exit = ipv6_frags_exit_net,
736 .async = true,
736}; 737};
737 738
738int __init ipv6_frag_init(void) 739int __init ipv6_frag_init(void)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9dcfadddd800..aa709b644945 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2671,14 +2671,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2671 if (err) 2671 if (err)
2672 goto out; 2672 goto out;
2673 rt->dst.lwtstate = lwtstate_get(lwtstate); 2673 rt->dst.lwtstate = lwtstate_get(lwtstate);
2674 if (lwtunnel_output_redirect(rt->dst.lwtstate)) { 2674 lwtunnel_set_redirect(&rt->dst);
2675 rt->dst.lwtstate->orig_output = rt->dst.output;
2676 rt->dst.output = lwtunnel_output;
2677 }
2678 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
2679 rt->dst.lwtstate->orig_input = rt->dst.input;
2680 rt->dst.input = lwtunnel_input;
2681 }
2682 } 2675 }
2683 2676
2684 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 2677 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
@@ -4979,6 +4972,7 @@ static void __net_exit ip6_route_net_exit_late(struct net *net)
4979static struct pernet_operations ip6_route_net_ops = { 4972static struct pernet_operations ip6_route_net_ops = {
4980 .init = ip6_route_net_init, 4973 .init = ip6_route_net_init,
4981 .exit = ip6_route_net_exit, 4974 .exit = ip6_route_net_exit,
4975 .async = true,
4982}; 4976};
4983 4977
4984static int __net_init ipv6_inetpeer_init(struct net *net) 4978static int __net_init ipv6_inetpeer_init(struct net *net)
@@ -5004,11 +4998,13 @@ static void __net_exit ipv6_inetpeer_exit(struct net *net)
5004static struct pernet_operations ipv6_inetpeer_ops = { 4998static struct pernet_operations ipv6_inetpeer_ops = {
5005 .init = ipv6_inetpeer_init, 4999 .init = ipv6_inetpeer_init,
5006 .exit = ipv6_inetpeer_exit, 5000 .exit = ipv6_inetpeer_exit,
5001 .async = true,
5007}; 5002};
5008 5003
5009static struct pernet_operations ip6_route_net_late_ops = { 5004static struct pernet_operations ip6_route_net_late_ops = {
5010 .init = ip6_route_net_init_late, 5005 .init = ip6_route_net_init_late,
5011 .exit = ip6_route_net_exit_late, 5006 .exit = ip6_route_net_exit_late,
5007 .async = true,
5012}; 5008};
5013 5009
5014static struct notifier_block ip6_route_dev_notifier = { 5010static struct notifier_block ip6_route_dev_notifier = {
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 7f5621d09571..c3f13c3bd8a9 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -395,6 +395,7 @@ static void __net_exit seg6_net_exit(struct net *net)
395static struct pernet_operations ip6_segments_ops = { 395static struct pernet_operations ip6_segments_ops = {
396 .init = seg6_net_init, 396 .init = seg6_net_init,
397 .exit = seg6_net_exit, 397 .exit = seg6_net_exit,
398 .async = true,
398}; 399};
399 400
400static const struct genl_ops seg6_genl_ops[] = { 401static const struct genl_ops seg6_genl_ops[] = {
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index a789a8ac6a64..262f791f1b9b 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -251,6 +251,7 @@ static void __net_exit ipv6_sysctl_net_exit(struct net *net)
251static struct pernet_operations ipv6_sysctl_net_ops = { 251static struct pernet_operations ipv6_sysctl_net_ops = {
252 .init = ipv6_sysctl_net_init, 252 .init = ipv6_sysctl_net_init,
253 .exit = ipv6_sysctl_net_exit, 253 .exit = ipv6_sysctl_net_exit,
254 .async = true,
254}; 255};
255 256
256static struct ctl_table_header *ip6_header; 257static struct ctl_table_header *ip6_header;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 412139f4eccd..5425d7b100ee 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1451,6 +1451,7 @@ process:
1451 1451
1452 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1452 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1453 struct request_sock *req = inet_reqsk(sk); 1453 struct request_sock *req = inet_reqsk(sk);
1454 bool req_stolen = false;
1454 struct sock *nsk; 1455 struct sock *nsk;
1455 1456
1456 sk = req->rsk_listener; 1457 sk = req->rsk_listener;
@@ -1470,10 +1471,20 @@ process:
1470 th = (const struct tcphdr *)skb->data; 1471 th = (const struct tcphdr *)skb->data;
1471 hdr = ipv6_hdr(skb); 1472 hdr = ipv6_hdr(skb);
1472 tcp_v6_fill_cb(skb, hdr, th); 1473 tcp_v6_fill_cb(skb, hdr, th);
1473 nsk = tcp_check_req(sk, skb, req, false); 1474 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1474 } 1475 }
1475 if (!nsk) { 1476 if (!nsk) {
1476 reqsk_put(req); 1477 reqsk_put(req);
1478 if (req_stolen) {
1479 /* Another cpu got exclusive access to req
1480 * and created a full blown socket.
1481 * Try to feed this packet to this socket
1482 * instead of discarding it.
1483 */
1484 tcp_v6_restore_cb(skb);
1485 sock_put(sk);
1486 goto lookup;
1487 }
1477 goto discard_and_relse; 1488 goto discard_and_relse;
1478 } 1489 }
1479 if (nsk == sk) { 1490 if (nsk == sk) {
@@ -1996,6 +2007,7 @@ static struct pernet_operations tcpv6_net_ops = {
1996 .init = tcpv6_net_init, 2007 .init = tcpv6_net_init,
1997 .exit = tcpv6_net_exit, 2008 .exit = tcpv6_net_exit,
1998 .exit_batch = tcpv6_net_exit_batch, 2009 .exit_batch = tcpv6_net_exit_batch,
2010 .async = true,
1999}; 2011};
2000 2012
2001int __init tcpv6_init(void) 2013int __init tcpv6_init(void)
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 14ae32bb1f3d..f3839780dc31 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -123,6 +123,7 @@ static void __net_exit udplite6_proc_exit_net(struct net *net)
123static struct pernet_operations udplite6_net_ops = { 123static struct pernet_operations udplite6_net_ops = {
124 .init = udplite6_proc_init_net, 124 .init = udplite6_proc_init_net,
125 .exit = udplite6_proc_exit_net, 125 .exit = udplite6_proc_exit_net,
126 .async = true,
126}; 127};
127 128
128int __init udplite6_proc_init(void) 129int __init udplite6_proc_init(void)
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 09fb44ee3b45..88cd0c90fa81 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -395,6 +395,7 @@ static void __net_exit xfrm6_net_exit(struct net *net)
395static struct pernet_operations xfrm6_net_ops = { 395static struct pernet_operations xfrm6_net_ops = {
396 .init = xfrm6_net_init, 396 .init = xfrm6_net_init,
397 .exit = xfrm6_net_exit, 397 .exit = xfrm6_net_exit,
398 .async = true,
398}; 399};
399 400
400int __init xfrm6_init(void) 401int __init xfrm6_init(void)
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 1e8cc7bcbca3..81ce15ffb878 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -989,14 +989,13 @@ done:
989} 989}
990 990
991static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr, 991static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr,
992 int *len, int peer) 992 int peer)
993{ 993{
994 struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr; 994 struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr;
995 struct sock *sk = sock->sk; 995 struct sock *sk = sock->sk;
996 struct iucv_sock *iucv = iucv_sk(sk); 996 struct iucv_sock *iucv = iucv_sk(sk);
997 997
998 addr->sa_family = AF_IUCV; 998 addr->sa_family = AF_IUCV;
999 *len = sizeof(struct sockaddr_iucv);
1000 999
1001 if (peer) { 1000 if (peer) {
1002 memcpy(siucv->siucv_user_id, iucv->dst_user_id, 8); 1001 memcpy(siucv->siucv_user_id, iucv->dst_user_id, 8);
@@ -1009,7 +1008,7 @@ static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr,
1009 memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr)); 1008 memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr));
1010 memset(&siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid)); 1009 memset(&siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid));
1011 1010
1012 return 0; 1011 return sizeof(struct sockaddr_iucv);
1013} 1012}
1014 1013
1015/** 1014/**
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index f297d53a11aa..435594648dac 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1417,6 +1417,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
1417 */ 1417 */
1418 if (csk->sk_user_data) { 1418 if (csk->sk_user_data) {
1419 write_unlock_bh(&csk->sk_callback_lock); 1419 write_unlock_bh(&csk->sk_callback_lock);
1420 strp_stop(&psock->strp);
1420 strp_done(&psock->strp); 1421 strp_done(&psock->strp);
1421 kmem_cache_free(kcm_psockp, psock); 1422 kmem_cache_free(kcm_psockp, psock);
1422 return -EALREADY; 1423 return -EALREADY;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index ff61124fdf59..4614585e1720 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -349,7 +349,7 @@ static int l2tp_ip_disconnect(struct sock *sk, int flags)
349} 349}
350 350
351static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr, 351static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
352 int *uaddr_len, int peer) 352 int peer)
353{ 353{
354 struct sock *sk = sock->sk; 354 struct sock *sk = sock->sk;
355 struct inet_sock *inet = inet_sk(sk); 355 struct inet_sock *inet = inet_sk(sk);
@@ -370,8 +370,7 @@ static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
370 lsa->l2tp_conn_id = lsk->conn_id; 370 lsa->l2tp_conn_id = lsk->conn_id;
371 lsa->l2tp_addr.s_addr = addr; 371 lsa->l2tp_addr.s_addr = addr;
372 } 372 }
373 *uaddr_len = sizeof(*lsa); 373 return sizeof(*lsa);
374 return 0;
375} 374}
376 375
377static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb) 376static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb)
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 192344688c06..efea58b66295 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -421,7 +421,7 @@ static int l2tp_ip6_disconnect(struct sock *sk, int flags)
421} 421}
422 422
423static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, 423static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
424 int *uaddr_len, int peer) 424 int peer)
425{ 425{
426 struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *)uaddr; 426 struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *)uaddr;
427 struct sock *sk = sock->sk; 427 struct sock *sk = sock->sk;
@@ -449,8 +449,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
449 } 449 }
450 if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL) 450 if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL)
451 lsa->l2tp_scope_id = sk->sk_bound_dev_if; 451 lsa->l2tp_scope_id = sk->sk_bound_dev_if;
452 *uaddr_len = sizeof(*lsa); 452 return sizeof(*lsa);
453 return 0;
454} 453}
455 454
456static int l2tp_ip6_backlog_recv(struct sock *sk, struct sk_buff *skb) 455static int l2tp_ip6_backlog_recv(struct sock *sk, struct sk_buff *skb)
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 59f246d7b290..99a03c72db4f 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -870,7 +870,7 @@ err:
870/* getname() support. 870/* getname() support.
871 */ 871 */
872static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, 872static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
873 int *usockaddr_len, int peer) 873 int peer)
874{ 874{
875 int len = 0; 875 int len = 0;
876 int error = 0; 876 int error = 0;
@@ -969,8 +969,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
969 memcpy(uaddr, &sp, len); 969 memcpy(uaddr, &sp, len);
970 } 970 }
971 971
972 *usockaddr_len = len; 972 error = len;
973 error = 0;
974 973
975 sock_put(sk); 974 sock_put(sk);
976end: 975end:
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index c38d16f22d2a..01dcc0823d1f 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -971,7 +971,7 @@ release:
971 * Return the address information of a socket. 971 * Return the address information of a socket.
972 */ 972 */
973static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr, 973static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
974 int *uaddrlen, int peer) 974 int peer)
975{ 975{
976 struct sockaddr_llc sllc; 976 struct sockaddr_llc sllc;
977 struct sock *sk = sock->sk; 977 struct sock *sk = sock->sk;
@@ -982,7 +982,6 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
982 lock_sock(sk); 982 lock_sock(sk);
983 if (sock_flag(sk, SOCK_ZAPPED)) 983 if (sock_flag(sk, SOCK_ZAPPED))
984 goto out; 984 goto out;
985 *uaddrlen = sizeof(sllc);
986 if (peer) { 985 if (peer) {
987 rc = -ENOTCONN; 986 rc = -ENOTCONN;
988 if (sk->sk_state != TCP_ESTABLISHED) 987 if (sk->sk_state != TCP_ESTABLISHED)
@@ -1003,9 +1002,9 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
1003 IFHWADDRLEN); 1002 IFHWADDRLEN);
1004 } 1003 }
1005 } 1004 }
1006 rc = 0;
1007 sllc.sllc_family = AF_LLC; 1005 sllc.sllc_family = AF_LLC;
1008 memcpy(uaddr, &sllc, sizeof(sllc)); 1006 memcpy(uaddr, &sllc, sizeof(sllc));
1007 rc = sizeof(sllc);
1009out: 1008out:
1010 release_sock(sk); 1009 release_sock(sk);
1011 return rc; 1010 return rc;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 0f6b8172fb9a..d72cc786c7b7 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -629,6 +629,7 @@ static void __net_exit netfilter_net_exit(struct net *net)
629static struct pernet_operations netfilter_net_ops = { 629static struct pernet_operations netfilter_net_ops = {
630 .init = netfilter_net_init, 630 .init = netfilter_net_init,
631 .exit = netfilter_net_exit, 631 .exit = netfilter_net_exit,
632 .async = true,
632}; 633};
633 634
634int __init netfilter_init(void) 635int __init netfilter_init(void)
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index c2c1b16b7538..1ba3da51050d 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -577,6 +577,7 @@ static void __net_exit nf_log_net_exit(struct net *net)
577static struct pernet_operations nf_log_net_ops = { 577static struct pernet_operations nf_log_net_ops = {
578 .init = nf_log_net_init, 578 .init = nf_log_net_init,
579 .exit = nf_log_net_exit, 579 .exit = nf_log_net_exit,
580 .async = true,
580}; 581};
581 582
582int __init netfilter_log_init(void) 583int __init netfilter_log_init(void)
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 2f685ee1f9c8..a6a435d7c8f4 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1765,6 +1765,7 @@ static void __net_exit xt_net_exit(struct net *net)
1765static struct pernet_operations xt_net_ops = { 1765static struct pernet_operations xt_net_ops = {
1766 .init = xt_net_init, 1766 .init = xt_net_init,
1767 .exit = xt_net_exit, 1767 .exit = xt_net_exit,
1768 .async = true,
1768}; 1769};
1769 1770
1770static int __init xt_init(void) 1771static int __init xt_init(void)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 2ad445c1d27c..63cb55d3c2fd 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -253,6 +253,7 @@ static struct pernet_operations netlink_tap_net_ops = {
253 .exit = netlink_tap_exit_net, 253 .exit = netlink_tap_exit_net,
254 .id = &netlink_tap_net_id, 254 .id = &netlink_tap_net_id,
255 .size = sizeof(struct netlink_tap_net), 255 .size = sizeof(struct netlink_tap_net),
256 .async = true,
256}; 257};
257 258
258static bool netlink_filter_tap(const struct sk_buff *skb) 259static bool netlink_filter_tap(const struct sk_buff *skb)
@@ -1105,7 +1106,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
1105} 1106}
1106 1107
1107static int netlink_getname(struct socket *sock, struct sockaddr *addr, 1108static int netlink_getname(struct socket *sock, struct sockaddr *addr,
1108 int *addr_len, int peer) 1109 int peer)
1109{ 1110{
1110 struct sock *sk = sock->sk; 1111 struct sock *sk = sock->sk;
1111 struct netlink_sock *nlk = nlk_sk(sk); 1112 struct netlink_sock *nlk = nlk_sk(sk);
@@ -1113,7 +1114,6 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
1113 1114
1114 nladdr->nl_family = AF_NETLINK; 1115 nladdr->nl_family = AF_NETLINK;
1115 nladdr->nl_pad = 0; 1116 nladdr->nl_pad = 0;
1116 *addr_len = sizeof(*nladdr);
1117 1117
1118 if (peer) { 1118 if (peer) {
1119 nladdr->nl_pid = nlk->dst_portid; 1119 nladdr->nl_pid = nlk->dst_portid;
@@ -1124,7 +1124,7 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
1124 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; 1124 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
1125 netlink_unlock_table(); 1125 netlink_unlock_table();
1126 } 1126 }
1127 return 0; 1127 return sizeof(*nladdr);
1128} 1128}
1129 1129
1130static int netlink_ioctl(struct socket *sock, unsigned int cmd, 1130static int netlink_ioctl(struct socket *sock, unsigned int cmd,
@@ -2724,6 +2724,7 @@ static void __init netlink_add_usersock_entry(void)
2724static struct pernet_operations __net_initdata netlink_net_ops = { 2724static struct pernet_operations __net_initdata netlink_net_ops = {
2725 .init = netlink_net_init, 2725 .init = netlink_net_init,
2726 .exit = netlink_net_exit, 2726 .exit = netlink_net_exit,
2727 .async = true,
2727}; 2728};
2728 2729
2729static inline u32 netlink_hash(const void *data, u32 len, u32 seed) 2730static inline u32 netlink_hash(const void *data, u32 len, u32 seed)
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 6f02499ef007..a6f63a5faee7 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1035,6 +1035,7 @@ static void __net_exit genl_pernet_exit(struct net *net)
1035static struct pernet_operations genl_pernet_ops = { 1035static struct pernet_operations genl_pernet_ops = {
1036 .init = genl_pernet_init, 1036 .init = genl_pernet_init,
1037 .exit = genl_pernet_exit, 1037 .exit = genl_pernet_exit,
1038 .async = true,
1038}; 1039};
1039 1040
1040static int __init genl_init(void) 1041static int __init genl_init(void)
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 9ba30c63be3d..35bb6807927f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -829,11 +829,12 @@ out_release:
829} 829}
830 830
831static int nr_getname(struct socket *sock, struct sockaddr *uaddr, 831static int nr_getname(struct socket *sock, struct sockaddr *uaddr,
832 int *uaddr_len, int peer) 832 int peer)
833{ 833{
834 struct full_sockaddr_ax25 *sax = (struct full_sockaddr_ax25 *)uaddr; 834 struct full_sockaddr_ax25 *sax = (struct full_sockaddr_ax25 *)uaddr;
835 struct sock *sk = sock->sk; 835 struct sock *sk = sock->sk;
836 struct nr_sock *nr = nr_sk(sk); 836 struct nr_sock *nr = nr_sk(sk);
837 int uaddr_len;
837 838
838 memset(&sax->fsa_ax25, 0, sizeof(struct sockaddr_ax25)); 839 memset(&sax->fsa_ax25, 0, sizeof(struct sockaddr_ax25));
839 840
@@ -848,16 +849,16 @@ static int nr_getname(struct socket *sock, struct sockaddr *uaddr,
848 sax->fsa_ax25.sax25_call = nr->user_addr; 849 sax->fsa_ax25.sax25_call = nr->user_addr;
849 memset(sax->fsa_digipeater, 0, sizeof(sax->fsa_digipeater)); 850 memset(sax->fsa_digipeater, 0, sizeof(sax->fsa_digipeater));
850 sax->fsa_digipeater[0] = nr->dest_addr; 851 sax->fsa_digipeater[0] = nr->dest_addr;
851 *uaddr_len = sizeof(struct full_sockaddr_ax25); 852 uaddr_len = sizeof(struct full_sockaddr_ax25);
852 } else { 853 } else {
853 sax->fsa_ax25.sax25_family = AF_NETROM; 854 sax->fsa_ax25.sax25_family = AF_NETROM;
854 sax->fsa_ax25.sax25_ndigis = 0; 855 sax->fsa_ax25.sax25_ndigis = 0;
855 sax->fsa_ax25.sax25_call = nr->source_addr; 856 sax->fsa_ax25.sax25_call = nr->source_addr;
856 *uaddr_len = sizeof(struct sockaddr_ax25); 857 uaddr_len = sizeof(struct sockaddr_ax25);
857 } 858 }
858 release_sock(sk); 859 release_sock(sk);
859 860
860 return 0; 861 return uaddr_len;
861} 862}
862 863
863int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) 864int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index 367d8c027101..2ceefa183cee 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -149,6 +149,10 @@ struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
149 149
150 pr_debug("uri: %s, len: %zu\n", uri, uri_len); 150 pr_debug("uri: %s, len: %zu\n", uri, uri_len);
151 151
152 /* sdreq->tlv_len is u8, takes uri_len, + 3 for header, + 1 for NULL */
153 if (WARN_ON_ONCE(uri_len > U8_MAX - 4))
154 return NULL;
155
152 sdreq = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL); 156 sdreq = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL);
153 if (sdreq == NULL) 157 if (sdreq == NULL)
154 return NULL; 158 return NULL;
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 376040092142..ea0c0c6f1874 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -497,7 +497,7 @@ error:
497} 497}
498 498
499static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr, 499static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
500 int *len, int peer) 500 int peer)
501{ 501{
502 struct sock *sk = sock->sk; 502 struct sock *sk = sock->sk;
503 struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); 503 struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
@@ -510,7 +510,6 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
510 llcp_sock->dsap, llcp_sock->ssap); 510 llcp_sock->dsap, llcp_sock->ssap);
511 511
512 memset(llcp_addr, 0, sizeof(*llcp_addr)); 512 memset(llcp_addr, 0, sizeof(*llcp_addr));
513 *len = sizeof(struct sockaddr_nfc_llcp);
514 513
515 lock_sock(sk); 514 lock_sock(sk);
516 if (!llcp_sock->dev) { 515 if (!llcp_sock->dev) {
@@ -528,7 +527,7 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
528 llcp_addr->service_name_len); 527 llcp_addr->service_name_len);
529 release_sock(sk); 528 release_sock(sk);
530 529
531 return 0; 530 return sizeof(struct sockaddr_nfc_llcp);
532} 531}
533 532
534static inline __poll_t llcp_accept_poll(struct sock *parent) 533static inline __poll_t llcp_accept_poll(struct sock *parent)
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index c0b83dc9d993..f018eafc2a0d 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -61,7 +61,8 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
61}; 61};
62 62
63static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = { 63static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = {
64 [NFC_SDP_ATTR_URI] = { .type = NLA_STRING }, 64 [NFC_SDP_ATTR_URI] = { .type = NLA_STRING,
65 .len = U8_MAX - 4 },
65 [NFC_SDP_ATTR_SAP] = { .type = NLA_U8 }, 66 [NFC_SDP_ATTR_SAP] = { .type = NLA_U8 },
66}; 67};
67 68
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e0f3f4aeeb4f..2c5a6fe5d749 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3409,7 +3409,7 @@ out:
3409} 3409}
3410 3410
3411static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, 3411static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
3412 int *uaddr_len, int peer) 3412 int peer)
3413{ 3413{
3414 struct net_device *dev; 3414 struct net_device *dev;
3415 struct sock *sk = sock->sk; 3415 struct sock *sk = sock->sk;
@@ -3424,13 +3424,12 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
3424 if (dev) 3424 if (dev)
3425 strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); 3425 strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
3426 rcu_read_unlock(); 3426 rcu_read_unlock();
3427 *uaddr_len = sizeof(*uaddr);
3428 3427
3429 return 0; 3428 return sizeof(*uaddr);
3430} 3429}
3431 3430
3432static int packet_getname(struct socket *sock, struct sockaddr *uaddr, 3431static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
3433 int *uaddr_len, int peer) 3432 int peer)
3434{ 3433{
3435 struct net_device *dev; 3434 struct net_device *dev;
3436 struct sock *sk = sock->sk; 3435 struct sock *sk = sock->sk;
@@ -3455,9 +3454,8 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
3455 sll->sll_halen = 0; 3454 sll->sll_halen = 0;
3456 } 3455 }
3457 rcu_read_unlock(); 3456 rcu_read_unlock();
3458 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
3459 3457
3460 return 0; 3458 return offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
3461} 3459}
3462 3460
3463static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, 3461static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
@@ -4559,6 +4557,7 @@ static void __net_exit packet_net_exit(struct net *net)
4559static struct pernet_operations packet_net_ops = { 4557static struct pernet_operations packet_net_ops = {
4560 .init = packet_net_init, 4558 .init = packet_net_init,
4561 .exit = packet_net_exit, 4559 .exit = packet_net_exit,
4560 .async = true,
4562}; 4561};
4563 4562
4564 4563
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index fffcd69f63ff..f9b40e6a18a5 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -326,7 +326,7 @@ static int pn_socket_accept(struct socket *sock, struct socket *newsock,
326} 326}
327 327
328static int pn_socket_getname(struct socket *sock, struct sockaddr *addr, 328static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
329 int *sockaddr_len, int peer) 329 int peer)
330{ 330{
331 struct sock *sk = sock->sk; 331 struct sock *sk = sock->sk;
332 struct pn_sock *pn = pn_sk(sk); 332 struct pn_sock *pn = pn_sk(sk);
@@ -337,8 +337,7 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
337 pn_sockaddr_set_object((struct sockaddr_pn *)addr, 337 pn_sockaddr_set_object((struct sockaddr_pn *)addr,
338 pn->sobject); 338 pn->sobject);
339 339
340 *sockaddr_len = sizeof(struct sockaddr_pn); 340 return sizeof(struct sockaddr_pn);
341 return 0;
342} 341}
343 342
344static __poll_t pn_socket_poll(struct file *file, struct socket *sock, 343static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 5fb3929e3d7d..b33e5aeb4c06 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -893,7 +893,7 @@ static int qrtr_connect(struct socket *sock, struct sockaddr *saddr,
893} 893}
894 894
895static int qrtr_getname(struct socket *sock, struct sockaddr *saddr, 895static int qrtr_getname(struct socket *sock, struct sockaddr *saddr,
896 int *len, int peer) 896 int peer)
897{ 897{
898 struct qrtr_sock *ipc = qrtr_sk(sock->sk); 898 struct qrtr_sock *ipc = qrtr_sk(sock->sk);
899 struct sockaddr_qrtr qaddr; 899 struct sockaddr_qrtr qaddr;
@@ -912,12 +912,11 @@ static int qrtr_getname(struct socket *sock, struct sockaddr *saddr,
912 } 912 }
913 release_sock(sk); 913 release_sock(sk);
914 914
915 *len = sizeof(qaddr);
916 qaddr.sq_family = AF_QIPCRTR; 915 qaddr.sq_family = AF_QIPCRTR;
917 916
918 memcpy(saddr, &qaddr, sizeof(qaddr)); 917 memcpy(saddr, &qaddr, sizeof(qaddr));
919 918
920 return 0; 919 return sizeof(qaddr);
921} 920}
922 921
923static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 922static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 744c637c86b0..a937f18896ae 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -110,7 +110,7 @@ void rds_wake_sk_sleep(struct rds_sock *rs)
110} 110}
111 111
112static int rds_getname(struct socket *sock, struct sockaddr *uaddr, 112static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
113 int *uaddr_len, int peer) 113 int peer)
114{ 114{
115 struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; 115 struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
116 struct rds_sock *rs = rds_sk_to_rs(sock->sk); 116 struct rds_sock *rs = rds_sk_to_rs(sock->sk);
@@ -131,8 +131,7 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
131 131
132 sin->sin_family = AF_INET; 132 sin->sin_family = AF_INET;
133 133
134 *uaddr_len = sizeof(*sin); 134 return sizeof(*sin);
135 return 0;
136} 135}
137 136
138/* 137/*
@@ -183,6 +182,8 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
183 mask |= (EPOLLIN | EPOLLRDNORM); 182 mask |= (EPOLLIN | EPOLLRDNORM);
184 if (rs->rs_snd_bytes < rds_sk_sndbuf(rs)) 183 if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
185 mask |= (EPOLLOUT | EPOLLWRNORM); 184 mask |= (EPOLLOUT | EPOLLWRNORM);
185 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
186 mask |= POLLERR;
186 read_unlock_irqrestore(&rs->rs_recv_lock, flags); 187 read_unlock_irqrestore(&rs->rs_recv_lock, flags);
187 188
188 /* clear state any time we wake a seen-congested socket */ 189 /* clear state any time we wake a seen-congested socket */
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 94e190febfdd..2da3176bf792 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -224,7 +224,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
224 if (rds_destroy_pending(conn)) 224 if (rds_destroy_pending(conn))
225 ret = -ENETDOWN; 225 ret = -ENETDOWN;
226 else 226 else
227 ret = trans->conn_alloc(conn, gfp); 227 ret = trans->conn_alloc(conn, GFP_ATOMIC);
228 if (ret) { 228 if (ret) {
229 rcu_read_unlock(); 229 rcu_read_unlock();
230 kfree(conn->c_path); 230 kfree(conn->c_path);
diff --git a/net/rds/message.c b/net/rds/message.c
index 4318cc9b78f7..651834513481 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -33,6 +33,9 @@
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/export.h> 35#include <linux/export.h>
36#include <linux/skbuff.h>
37#include <linux/list.h>
38#include <linux/errqueue.h>
36 39
37#include "rds.h" 40#include "rds.h"
38 41
@@ -53,20 +56,92 @@ void rds_message_addref(struct rds_message *rm)
53} 56}
54EXPORT_SYMBOL_GPL(rds_message_addref); 57EXPORT_SYMBOL_GPL(rds_message_addref);
55 58
59static inline bool skb_zcookie_add(struct sk_buff *skb, u32 cookie)
60{
61 struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
62 int ncookies;
63 u32 *ptr;
64
65 if (serr->ee.ee_origin != SO_EE_ORIGIN_ZCOOKIE)
66 return false;
67 ncookies = serr->ee.ee_data;
68 if (ncookies == SO_EE_ORIGIN_MAX_ZCOOKIES)
69 return false;
70 ptr = skb_put(skb, sizeof(u32));
71 *ptr = cookie;
72 serr->ee.ee_data = ++ncookies;
73 return true;
74}
75
76static void rds_rm_zerocopy_callback(struct rds_sock *rs,
77 struct rds_znotifier *znotif)
78{
79 struct sock *sk = rds_rs_to_sk(rs);
80 struct sk_buff *skb, *tail;
81 struct sock_exterr_skb *serr;
82 unsigned long flags;
83 struct sk_buff_head *q;
84 u32 cookie = znotif->z_cookie;
85
86 q = &sk->sk_error_queue;
87 spin_lock_irqsave(&q->lock, flags);
88 tail = skb_peek_tail(q);
89
90 if (tail && skb_zcookie_add(tail, cookie)) {
91 spin_unlock_irqrestore(&q->lock, flags);
92 mm_unaccount_pinned_pages(&znotif->z_mmp);
93 consume_skb(rds_skb_from_znotifier(znotif));
94 sk->sk_error_report(sk);
95 return;
96 }
97
98 skb = rds_skb_from_znotifier(znotif);
99 serr = SKB_EXT_ERR(skb);
100 memset(&serr->ee, 0, sizeof(serr->ee));
101 serr->ee.ee_errno = 0;
102 serr->ee.ee_origin = SO_EE_ORIGIN_ZCOOKIE;
103 serr->ee.ee_info = 0;
104 WARN_ON(!skb_zcookie_add(skb, cookie));
105
106 __skb_queue_tail(q, skb);
107
108 spin_unlock_irqrestore(&q->lock, flags);
109 sk->sk_error_report(sk);
110
111 mm_unaccount_pinned_pages(&znotif->z_mmp);
112}
113
56/* 114/*
57 * This relies on dma_map_sg() not touching sg[].page during merging. 115 * This relies on dma_map_sg() not touching sg[].page during merging.
58 */ 116 */
59static void rds_message_purge(struct rds_message *rm) 117static void rds_message_purge(struct rds_message *rm)
60{ 118{
61 unsigned long i; 119 unsigned long i, flags;
120 bool zcopy = false;
62 121
63 if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags))) 122 if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
64 return; 123 return;
65 124
125 spin_lock_irqsave(&rm->m_rs_lock, flags);
126 if (rm->m_rs) {
127 struct rds_sock *rs = rm->m_rs;
128
129 if (rm->data.op_mmp_znotifier) {
130 zcopy = true;
131 rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier);
132 rm->data.op_mmp_znotifier = NULL;
133 }
134 sock_put(rds_rs_to_sk(rs));
135 rm->m_rs = NULL;
136 }
137 spin_unlock_irqrestore(&rm->m_rs_lock, flags);
138
66 for (i = 0; i < rm->data.op_nents; i++) { 139 for (i = 0; i < rm->data.op_nents; i++) {
67 rdsdebug("putting data page %p\n", (void *)sg_page(&rm->data.op_sg[i]));
68 /* XXX will have to put_page for page refs */ 140 /* XXX will have to put_page for page refs */
69 __free_page(sg_page(&rm->data.op_sg[i])); 141 if (!zcopy)
142 __free_page(sg_page(&rm->data.op_sg[i]));
143 else
144 put_page(sg_page(&rm->data.op_sg[i]));
70 } 145 }
71 rm->data.op_nents = 0; 146 rm->data.op_nents = 0;
72 147
@@ -266,12 +341,14 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
266 return rm; 341 return rm;
267} 342}
268 343
269int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from) 344int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
345 bool zcopy)
270{ 346{
271 unsigned long to_copy, nbytes; 347 unsigned long to_copy, nbytes;
272 unsigned long sg_off; 348 unsigned long sg_off;
273 struct scatterlist *sg; 349 struct scatterlist *sg;
274 int ret = 0; 350 int ret = 0;
351 int length = iov_iter_count(from);
275 352
276 rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from)); 353 rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
277 354
@@ -281,6 +358,53 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from)
281 sg = rm->data.op_sg; 358 sg = rm->data.op_sg;
282 sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */ 359 sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
283 360
361 if (zcopy) {
362 int total_copied = 0;
363 struct sk_buff *skb;
364
365 skb = alloc_skb(SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(u32),
366 GFP_KERNEL);
367 if (!skb)
368 return -ENOMEM;
369 rm->data.op_mmp_znotifier = RDS_ZCOPY_SKB(skb);
370 if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
371 length)) {
372 ret = -ENOMEM;
373 goto err;
374 }
375 while (iov_iter_count(from)) {
376 struct page *pages;
377 size_t start;
378 ssize_t copied;
379
380 copied = iov_iter_get_pages(from, &pages, PAGE_SIZE,
381 1, &start);
382 if (copied < 0) {
383 struct mmpin *mmp;
384 int i;
385
386 for (i = 0; i < rm->data.op_nents; i++)
387 put_page(sg_page(&rm->data.op_sg[i]));
388 mmp = &rm->data.op_mmp_znotifier->z_mmp;
389 mm_unaccount_pinned_pages(mmp);
390 ret = -EFAULT;
391 goto err;
392 }
393 total_copied += copied;
394 iov_iter_advance(from, copied);
395 length -= copied;
396 sg_set_page(sg, pages, copied, start);
397 rm->data.op_nents++;
398 sg++;
399 }
400 WARN_ON_ONCE(length != 0);
401 return ret;
402err:
403 consume_skb(skb);
404 rm->data.op_mmp_znotifier = NULL;
405 return ret;
406 } /* zcopy */
407
284 while (iov_iter_count(from)) { 408 while (iov_iter_count(from)) {
285 if (!sg_page(sg)) { 409 if (!sg_page(sg)) {
286 ret = rds_page_remainder_alloc(sg, iov_iter_count(from), 410 ret = rds_page_remainder_alloc(sg, iov_iter_count(from),
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 7301b9b01890..31cd38852050 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -356,6 +356,19 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
356#define RDS_MSG_PAGEVEC 7 356#define RDS_MSG_PAGEVEC 7
357#define RDS_MSG_FLUSH 8 357#define RDS_MSG_FLUSH 8
358 358
359struct rds_znotifier {
360 struct list_head z_list;
361 struct mmpin z_mmp;
362 u32 z_cookie;
363};
364
365#define RDS_ZCOPY_SKB(__skb) ((struct rds_znotifier *)&((__skb)->cb[0]))
366
367static inline struct sk_buff *rds_skb_from_znotifier(struct rds_znotifier *z)
368{
369 return container_of((void *)z, struct sk_buff, cb);
370}
371
359struct rds_message { 372struct rds_message {
360 refcount_t m_refcount; 373 refcount_t m_refcount;
361 struct list_head m_sock_item; 374 struct list_head m_sock_item;
@@ -436,6 +449,7 @@ struct rds_message {
436 unsigned int op_count; 449 unsigned int op_count;
437 unsigned int op_dmasg; 450 unsigned int op_dmasg;
438 unsigned int op_dmaoff; 451 unsigned int op_dmaoff;
452 struct rds_znotifier *op_mmp_znotifier;
439 struct scatterlist *op_sg; 453 struct scatterlist *op_sg;
440 } data; 454 } data;
441 }; 455 };
@@ -771,7 +785,8 @@ rds_conn_connecting(struct rds_connection *conn)
771/* message.c */ 785/* message.c */
772struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp); 786struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
773struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents); 787struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
774int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from); 788int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
789 bool zcopy);
775struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len); 790struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
776void rds_message_populate_header(struct rds_header *hdr, __be16 sport, 791void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
777 __be16 dport, u64 seq); 792 __be16 dport, u64 seq);
diff --git a/net/rds/recv.c b/net/rds/recv.c
index b25bcfe411ca..b080961464df 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -594,6 +594,8 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
594 594
595 if (msg_flags & MSG_OOB) 595 if (msg_flags & MSG_OOB)
596 goto out; 596 goto out;
597 if (msg_flags & MSG_ERRQUEUE)
598 return sock_recv_errqueue(sk, msg, size, SOL_IP, IP_RECVERR);
597 599
598 while (1) { 600 while (1) {
599 /* If there are pending notifications, do those - and nothing else */ 601 /* If there are pending notifications, do those - and nothing else */
diff --git a/net/rds/send.c b/net/rds/send.c
index b1b0022b8370..79d158b3def0 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -649,7 +649,6 @@ static void rds_send_remove_from_sock(struct list_head *messages, int status)
649 rm->rdma.op_notifier = NULL; 649 rm->rdma.op_notifier = NULL;
650 } 650 }
651 was_on_sock = 1; 651 was_on_sock = 1;
652 rm->m_rs = NULL;
653 } 652 }
654 spin_unlock(&rs->rs_lock); 653 spin_unlock(&rs->rs_lock);
655 654
@@ -756,9 +755,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
756 */ 755 */
757 if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) { 756 if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) {
758 spin_unlock_irqrestore(&cp->cp_lock, flags); 757 spin_unlock_irqrestore(&cp->cp_lock, flags);
759 spin_lock_irqsave(&rm->m_rs_lock, flags);
760 rm->m_rs = NULL;
761 spin_unlock_irqrestore(&rm->m_rs_lock, flags);
762 continue; 758 continue;
763 } 759 }
764 list_del_init(&rm->m_conn_item); 760 list_del_init(&rm->m_conn_item);
@@ -774,7 +770,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
774 __rds_send_complete(rs, rm, RDS_RDMA_CANCELED); 770 __rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
775 spin_unlock(&rs->rs_lock); 771 spin_unlock(&rs->rs_lock);
776 772
777 rm->m_rs = NULL;
778 spin_unlock_irqrestore(&rm->m_rs_lock, flags); 773 spin_unlock_irqrestore(&rm->m_rs_lock, flags);
779 774
780 rds_message_put(rm); 775 rds_message_put(rm);
@@ -798,7 +793,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
798 __rds_send_complete(rs, rm, RDS_RDMA_CANCELED); 793 __rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
799 spin_unlock(&rs->rs_lock); 794 spin_unlock(&rs->rs_lock);
800 795
801 rm->m_rs = NULL;
802 spin_unlock_irqrestore(&rm->m_rs_lock, flags); 796 spin_unlock_irqrestore(&rm->m_rs_lock, flags);
803 797
804 rds_message_put(rm); 798 rds_message_put(rm);
@@ -849,6 +843,7 @@ static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn,
849 list_add_tail(&rm->m_sock_item, &rs->rs_send_queue); 843 list_add_tail(&rm->m_sock_item, &rs->rs_send_queue);
850 set_bit(RDS_MSG_ON_SOCK, &rm->m_flags); 844 set_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
851 rds_message_addref(rm); 845 rds_message_addref(rm);
846 sock_hold(rds_rs_to_sk(rs));
852 rm->m_rs = rs; 847 rm->m_rs = rs;
853 848
854 /* The code ordering is a little weird, but we're 849 /* The code ordering is a little weird, but we're
@@ -880,12 +875,13 @@ out:
880 * rds_message is getting to be quite complicated, and we'd like to allocate 875 * rds_message is getting to be quite complicated, and we'd like to allocate
881 * it all in one go. This figures out how big it needs to be up front. 876 * it all in one go. This figures out how big it needs to be up front.
882 */ 877 */
883static int rds_rm_size(struct msghdr *msg, int data_len) 878static int rds_rm_size(struct msghdr *msg, int num_sgs)
884{ 879{
885 struct cmsghdr *cmsg; 880 struct cmsghdr *cmsg;
886 int size = 0; 881 int size = 0;
887 int cmsg_groups = 0; 882 int cmsg_groups = 0;
888 int retval; 883 int retval;
884 bool zcopy_cookie = false;
889 885
890 for_each_cmsghdr(cmsg, msg) { 886 for_each_cmsghdr(cmsg, msg) {
891 if (!CMSG_OK(msg, cmsg)) 887 if (!CMSG_OK(msg, cmsg))
@@ -904,6 +900,10 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
904 900
905 break; 901 break;
906 902
903 case RDS_CMSG_ZCOPY_COOKIE:
904 zcopy_cookie = true;
905 /* fall through */
906
907 case RDS_CMSG_RDMA_DEST: 907 case RDS_CMSG_RDMA_DEST:
908 case RDS_CMSG_RDMA_MAP: 908 case RDS_CMSG_RDMA_MAP:
909 cmsg_groups |= 2; 909 cmsg_groups |= 2;
@@ -924,7 +924,10 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
924 924
925 } 925 }
926 926
927 size += ceil(data_len, PAGE_SIZE) * sizeof(struct scatterlist); 927 if ((msg->msg_flags & MSG_ZEROCOPY) && !zcopy_cookie)
928 return -EINVAL;
929
930 size += num_sgs * sizeof(struct scatterlist);
928 931
929 /* Ensure (DEST, MAP) are never used with (ARGS, ATOMIC) */ 932 /* Ensure (DEST, MAP) are never used with (ARGS, ATOMIC) */
930 if (cmsg_groups == 3) 933 if (cmsg_groups == 3)
@@ -933,6 +936,18 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
933 return size; 936 return size;
934} 937}
935 938
939static int rds_cmsg_zcopy(struct rds_sock *rs, struct rds_message *rm,
940 struct cmsghdr *cmsg)
941{
942 u32 *cookie;
943
944 if (cmsg->cmsg_len < CMSG_LEN(sizeof(*cookie)))
945 return -EINVAL;
946 cookie = CMSG_DATA(cmsg);
947 rm->data.op_mmp_znotifier->z_cookie = *cookie;
948 return 0;
949}
950
936static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, 951static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
937 struct msghdr *msg, int *allocated_mr) 952 struct msghdr *msg, int *allocated_mr)
938{ 953{
@@ -975,6 +990,10 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
975 ret = rds_cmsg_atomic(rs, rm, cmsg); 990 ret = rds_cmsg_atomic(rs, rm, cmsg);
976 break; 991 break;
977 992
993 case RDS_CMSG_ZCOPY_COOKIE:
994 ret = rds_cmsg_zcopy(rs, rm, cmsg);
995 break;
996
978 default: 997 default:
979 return -EINVAL; 998 return -EINVAL;
980 } 999 }
@@ -1045,10 +1064,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
1045 long timeo = sock_sndtimeo(sk, nonblock); 1064 long timeo = sock_sndtimeo(sk, nonblock);
1046 struct rds_conn_path *cpath; 1065 struct rds_conn_path *cpath;
1047 size_t total_payload_len = payload_len, rdma_payload_len = 0; 1066 size_t total_payload_len = payload_len, rdma_payload_len = 0;
1067 bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) &&
1068 sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY));
1069 int num_sgs = ceil(payload_len, PAGE_SIZE);
1048 1070
1049 /* Mirror Linux UDP mirror of BSD error message compatibility */ 1071 /* Mirror Linux UDP mirror of BSD error message compatibility */
1050 /* XXX: Perhaps MSG_MORE someday */ 1072 /* XXX: Perhaps MSG_MORE someday */
1051 if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) { 1073 if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT | MSG_ZEROCOPY)) {
1052 ret = -EOPNOTSUPP; 1074 ret = -EOPNOTSUPP;
1053 goto out; 1075 goto out;
1054 } 1076 }
@@ -1092,8 +1114,15 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
1092 goto out; 1114 goto out;
1093 } 1115 }
1094 1116
1117 if (zcopy) {
1118 if (rs->rs_transport->t_type != RDS_TRANS_TCP) {
1119 ret = -EOPNOTSUPP;
1120 goto out;
1121 }
1122 num_sgs = iov_iter_npages(&msg->msg_iter, INT_MAX);
1123 }
1095 /* size of rm including all sgs */ 1124 /* size of rm including all sgs */
1096 ret = rds_rm_size(msg, payload_len); 1125 ret = rds_rm_size(msg, num_sgs);
1097 if (ret < 0) 1126 if (ret < 0)
1098 goto out; 1127 goto out;
1099 1128
@@ -1105,12 +1134,12 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
1105 1134
1106 /* Attach data to the rm */ 1135 /* Attach data to the rm */
1107 if (payload_len) { 1136 if (payload_len) {
1108 rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE)); 1137 rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
1109 if (!rm->data.op_sg) { 1138 if (!rm->data.op_sg) {
1110 ret = -ENOMEM; 1139 ret = -ENOMEM;
1111 goto out; 1140 goto out;
1112 } 1141 }
1113 ret = rds_message_copy_from_user(rm, &msg->msg_iter); 1142 ret = rds_message_copy_from_user(rm, &msg->msg_iter, zcopy);
1114 if (ret) 1143 if (ret)
1115 goto out; 1144 goto out;
1116 } 1145 }
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 44c4652721af..08230a145042 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -227,7 +227,6 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
227 struct rds_tcp_connection *tc; 227 struct rds_tcp_connection *tc;
228 unsigned long flags; 228 unsigned long flags;
229 struct sockaddr_in sin; 229 struct sockaddr_in sin;
230 int sinlen;
231 struct socket *sock; 230 struct socket *sock;
232 231
233 spin_lock_irqsave(&rds_tcp_tc_list_lock, flags); 232 spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
@@ -239,12 +238,10 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
239 238
240 sock = tc->t_sock; 239 sock = tc->t_sock;
241 if (sock) { 240 if (sock) {
242 sock->ops->getname(sock, (struct sockaddr *)&sin, 241 sock->ops->getname(sock, (struct sockaddr *)&sin, 0);
243 &sinlen, 0);
244 tsinfo.local_addr = sin.sin_addr.s_addr; 242 tsinfo.local_addr = sin.sin_addr.s_addr;
245 tsinfo.local_port = sin.sin_port; 243 tsinfo.local_port = sin.sin_port;
246 sock->ops->getname(sock, (struct sockaddr *)&sin, 244 sock->ops->getname(sock, (struct sockaddr *)&sin, 1);
247 &sinlen, 1);
248 tsinfo.peer_addr = sin.sin_addr.s_addr; 245 tsinfo.peer_addr = sin.sin_addr.s_addr;
249 tsinfo.peer_port = sin.sin_port; 246 tsinfo.peer_port = sin.sin_port;
250 } 247 }
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 083bd251406f..5170373b797c 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -938,7 +938,7 @@ out_release:
938} 938}
939 939
940static int rose_getname(struct socket *sock, struct sockaddr *uaddr, 940static int rose_getname(struct socket *sock, struct sockaddr *uaddr,
941 int *uaddr_len, int peer) 941 int peer)
942{ 942{
943 struct full_sockaddr_rose *srose = (struct full_sockaddr_rose *)uaddr; 943 struct full_sockaddr_rose *srose = (struct full_sockaddr_rose *)uaddr;
944 struct sock *sk = sock->sk; 944 struct sock *sk = sock->sk;
@@ -964,8 +964,7 @@ static int rose_getname(struct socket *sock, struct sockaddr *uaddr,
964 srose->srose_digis[n] = rose->source_digis[n]; 964 srose->srose_digis[n] = rose->source_digis[n];
965 } 965 }
966 966
967 *uaddr_len = sizeof(struct full_sockaddr_rose); 967 return sizeof(struct full_sockaddr_rose);
968 return 0;
969} 968}
970 969
971int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct rose_neigh *neigh, unsigned int lci) 970int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct rose_neigh *neigh, unsigned int lci)
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index cc21e8db25b0..9d45d8b56744 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -517,9 +517,10 @@ try_again:
517 ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, 517 ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
518 sizeof(unsigned int), &id32); 518 sizeof(unsigned int), &id32);
519 } else { 519 } else {
520 unsigned long idl = call->user_call_ID;
521
520 ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, 522 ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
521 sizeof(unsigned long), 523 sizeof(unsigned long), &idl);
522 &call->user_call_ID);
523 } 524 }
524 if (ret < 0) 525 if (ret < 0)
525 goto error_unlock_call; 526 goto error_unlock_call;
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index f24a6ae6819a..a01169fb5325 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -658,6 +658,18 @@ config NET_EMATCH_IPSET
658 To compile this code as a module, choose M here: the 658 To compile this code as a module, choose M here: the
659 module will be called em_ipset. 659 module will be called em_ipset.
660 660
661config NET_EMATCH_IPT
662 tristate "IPtables Matches"
663 depends on NET_EMATCH && NETFILTER && NETFILTER_XTABLES
664 ---help---
665 Say Y here to be able to classify packets based on iptables
666 matches.
667 Current supported match is "policy" which allows packet classification
668 based on IPsec policy that was used during decapsulation
669
670 To compile this code as a module, choose M here: the
671 module will be called em_ipt.
672
661config NET_CLS_ACT 673config NET_CLS_ACT
662 bool "Actions" 674 bool "Actions"
663 select NET_CLS 675 select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 5b635447e3f8..8811d3804878 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -75,3 +75,4 @@ obj-$(CONFIG_NET_EMATCH_META) += em_meta.o
75obj-$(CONFIG_NET_EMATCH_TEXT) += em_text.o 75obj-$(CONFIG_NET_EMATCH_TEXT) += em_text.o
76obj-$(CONFIG_NET_EMATCH_CANID) += em_canid.o 76obj-$(CONFIG_NET_EMATCH_CANID) += em_canid.o
77obj-$(CONFIG_NET_EMATCH_IPSET) += em_ipset.o 77obj-$(CONFIG_NET_EMATCH_IPSET) += em_ipset.o
78obj-$(CONFIG_NET_EMATCH_IPT) += em_ipt.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index eba6682727dd..1f65d6ada9ff 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -202,7 +202,8 @@ nla_put_failure:
202 202
203int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, 203int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
204 struct netlink_callback *cb, int type, 204 struct netlink_callback *cb, int type,
205 const struct tc_action_ops *ops) 205 const struct tc_action_ops *ops,
206 struct netlink_ext_ack *extack)
206{ 207{
207 struct tcf_idrinfo *idrinfo = tn->idrinfo; 208 struct tcf_idrinfo *idrinfo = tn->idrinfo;
208 209
@@ -211,7 +212,8 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
211 } else if (type == RTM_GETACTION) { 212 } else if (type == RTM_GETACTION) {
212 return tcf_dump_walker(idrinfo, skb, cb); 213 return tcf_dump_walker(idrinfo, skb, cb);
213 } else { 214 } else {
214 WARN(1, "tcf_generic_walker: unknown action %d\n", type); 215 WARN(1, "tcf_generic_walker: unknown command %d\n", type);
216 NL_SET_ERR_MSG(extack, "tcf_generic_walker: unknown command");
215 return -EINVAL; 217 return -EINVAL;
216 } 218 }
217} 219}
@@ -605,7 +607,8 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
605 607
606struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, 608struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
607 struct nlattr *nla, struct nlattr *est, 609 struct nlattr *nla, struct nlattr *est,
608 char *name, int ovr, int bind) 610 char *name, int ovr, int bind,
611 struct netlink_ext_ack *extack)
609{ 612{
610 struct tc_action *a; 613 struct tc_action *a;
611 struct tc_action_ops *a_o; 614 struct tc_action_ops *a_o;
@@ -616,31 +619,40 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
616 int err; 619 int err;
617 620
618 if (name == NULL) { 621 if (name == NULL) {
619 err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL); 622 err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
620 if (err < 0) 623 if (err < 0)
621 goto err_out; 624 goto err_out;
622 err = -EINVAL; 625 err = -EINVAL;
623 kind = tb[TCA_ACT_KIND]; 626 kind = tb[TCA_ACT_KIND];
624 if (kind == NULL) 627 if (!kind) {
628 NL_SET_ERR_MSG(extack, "TC action kind must be specified");
625 goto err_out; 629 goto err_out;
626 if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ) 630 }
631 if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ) {
632 NL_SET_ERR_MSG(extack, "TC action name too long");
627 goto err_out; 633 goto err_out;
634 }
628 if (tb[TCA_ACT_COOKIE]) { 635 if (tb[TCA_ACT_COOKIE]) {
629 int cklen = nla_len(tb[TCA_ACT_COOKIE]); 636 int cklen = nla_len(tb[TCA_ACT_COOKIE]);
630 637
631 if (cklen > TC_COOKIE_MAX_SIZE) 638 if (cklen > TC_COOKIE_MAX_SIZE) {
639 NL_SET_ERR_MSG(extack, "TC cookie size above the maximum");
632 goto err_out; 640 goto err_out;
641 }
633 642
634 cookie = nla_memdup_cookie(tb); 643 cookie = nla_memdup_cookie(tb);
635 if (!cookie) { 644 if (!cookie) {
645 NL_SET_ERR_MSG(extack, "No memory to generate TC cookie");
636 err = -ENOMEM; 646 err = -ENOMEM;
637 goto err_out; 647 goto err_out;
638 } 648 }
639 } 649 }
640 } else { 650 } else {
641 err = -EINVAL; 651 if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) {
642 if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) 652 NL_SET_ERR_MSG(extack, "TC action name too long");
653 err = -EINVAL;
643 goto err_out; 654 goto err_out;
655 }
644 } 656 }
645 657
646 a_o = tc_lookup_action_n(act_name); 658 a_o = tc_lookup_action_n(act_name);
@@ -663,15 +675,17 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
663 goto err_mod; 675 goto err_mod;
664 } 676 }
665#endif 677#endif
678 NL_SET_ERR_MSG(extack, "Failed to load TC action module");
666 err = -ENOENT; 679 err = -ENOENT;
667 goto err_out; 680 goto err_out;
668 } 681 }
669 682
670 /* backward compatibility for policer */ 683 /* backward compatibility for policer */
671 if (name == NULL) 684 if (name == NULL)
672 err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind); 685 err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
686 extack);
673 else 687 else
674 err = a_o->init(net, nla, est, &a, ovr, bind); 688 err = a_o->init(net, nla, est, &a, ovr, bind, extack);
675 if (err < 0) 689 if (err < 0)
676 goto err_mod; 690 goto err_mod;
677 691
@@ -697,6 +711,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
697 711
698 list_add_tail(&a->list, &actions); 712 list_add_tail(&a->list, &actions);
699 tcf_action_destroy(&actions, bind); 713 tcf_action_destroy(&actions, bind);
714 NL_SET_ERR_MSG(extack, "Failed to init TC action chain");
700 return ERR_PTR(err); 715 return ERR_PTR(err);
701 } 716 }
702 } 717 }
@@ -726,19 +741,20 @@ static void cleanup_a(struct list_head *actions, int ovr)
726 741
727int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, 742int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
728 struct nlattr *est, char *name, int ovr, int bind, 743 struct nlattr *est, char *name, int ovr, int bind,
729 struct list_head *actions) 744 struct list_head *actions, struct netlink_ext_ack *extack)
730{ 745{
731 struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; 746 struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
732 struct tc_action *act; 747 struct tc_action *act;
733 int err; 748 int err;
734 int i; 749 int i;
735 750
736 err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL); 751 err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
737 if (err < 0) 752 if (err < 0)
738 return err; 753 return err;
739 754
740 for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { 755 for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
741 act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind); 756 act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
757 extack);
742 if (IS_ERR(act)) { 758 if (IS_ERR(act)) {
743 err = PTR_ERR(act); 759 err = PTR_ERR(act);
744 goto err; 760 goto err;
@@ -822,7 +838,7 @@ static int tca_get_fill(struct sk_buff *skb, struct list_head *actions,
822 t->tca__pad2 = 0; 838 t->tca__pad2 = 0;
823 839
824 nest = nla_nest_start(skb, TCA_ACT_TAB); 840 nest = nla_nest_start(skb, TCA_ACT_TAB);
825 if (nest == NULL) 841 if (!nest)
826 goto out_nlmsg_trim; 842 goto out_nlmsg_trim;
827 843
828 if (tcf_action_dump(skb, actions, bind, ref) < 0) 844 if (tcf_action_dump(skb, actions, bind, ref) < 0)
@@ -840,7 +856,8 @@ out_nlmsg_trim:
840 856
841static int 857static int
842tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, 858tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
843 struct list_head *actions, int event) 859 struct list_head *actions, int event,
860 struct netlink_ext_ack *extack)
844{ 861{
845 struct sk_buff *skb; 862 struct sk_buff *skb;
846 863
@@ -849,6 +866,7 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
849 return -ENOBUFS; 866 return -ENOBUFS;
850 if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event, 867 if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event,
851 0, 0) <= 0) { 868 0, 0) <= 0) {
869 NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
852 kfree_skb(skb); 870 kfree_skb(skb);
853 return -EINVAL; 871 return -EINVAL;
854 } 872 }
@@ -857,7 +875,8 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
857} 875}
858 876
859static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla, 877static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
860 struct nlmsghdr *n, u32 portid) 878 struct nlmsghdr *n, u32 portid,
879 struct netlink_ext_ack *extack)
861{ 880{
862 struct nlattr *tb[TCA_ACT_MAX + 1]; 881 struct nlattr *tb[TCA_ACT_MAX + 1];
863 const struct tc_action_ops *ops; 882 const struct tc_action_ops *ops;
@@ -865,22 +884,26 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
865 int index; 884 int index;
866 int err; 885 int err;
867 886
868 err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL); 887 err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
869 if (err < 0) 888 if (err < 0)
870 goto err_out; 889 goto err_out;
871 890
872 err = -EINVAL; 891 err = -EINVAL;
873 if (tb[TCA_ACT_INDEX] == NULL || 892 if (tb[TCA_ACT_INDEX] == NULL ||
874 nla_len(tb[TCA_ACT_INDEX]) < sizeof(index)) 893 nla_len(tb[TCA_ACT_INDEX]) < sizeof(index)) {
894 NL_SET_ERR_MSG(extack, "Invalid TC action index value");
875 goto err_out; 895 goto err_out;
896 }
876 index = nla_get_u32(tb[TCA_ACT_INDEX]); 897 index = nla_get_u32(tb[TCA_ACT_INDEX]);
877 898
878 err = -EINVAL; 899 err = -EINVAL;
879 ops = tc_lookup_action(tb[TCA_ACT_KIND]); 900 ops = tc_lookup_action(tb[TCA_ACT_KIND]);
880 if (!ops) /* could happen in batch of actions */ 901 if (!ops) { /* could happen in batch of actions */
902 NL_SET_ERR_MSG(extack, "Specified TC action not found");
881 goto err_out; 903 goto err_out;
904 }
882 err = -ENOENT; 905 err = -ENOENT;
883 if (ops->lookup(net, &a, index) == 0) 906 if (ops->lookup(net, &a, index, extack) == 0)
884 goto err_mod; 907 goto err_mod;
885 908
886 module_put(ops->owner); 909 module_put(ops->owner);
@@ -893,7 +916,8 @@ err_out:
893} 916}
894 917
895static int tca_action_flush(struct net *net, struct nlattr *nla, 918static int tca_action_flush(struct net *net, struct nlattr *nla,
896 struct nlmsghdr *n, u32 portid) 919 struct nlmsghdr *n, u32 portid,
920 struct netlink_ext_ack *extack)
897{ 921{
898 struct sk_buff *skb; 922 struct sk_buff *skb;
899 unsigned char *b; 923 unsigned char *b;
@@ -907,39 +931,45 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
907 int err = -ENOMEM; 931 int err = -ENOMEM;
908 932
909 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 933 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
910 if (!skb) { 934 if (!skb)
911 pr_debug("tca_action_flush: failed skb alloc\n");
912 return err; 935 return err;
913 }
914 936
915 b = skb_tail_pointer(skb); 937 b = skb_tail_pointer(skb);
916 938
917 err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL); 939 err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
918 if (err < 0) 940 if (err < 0)
919 goto err_out; 941 goto err_out;
920 942
921 err = -EINVAL; 943 err = -EINVAL;
922 kind = tb[TCA_ACT_KIND]; 944 kind = tb[TCA_ACT_KIND];
923 ops = tc_lookup_action(kind); 945 ops = tc_lookup_action(kind);
924 if (!ops) /*some idjot trying to flush unknown action */ 946 if (!ops) { /*some idjot trying to flush unknown action */
947 NL_SET_ERR_MSG(extack, "Cannot flush unknown TC action");
925 goto err_out; 948 goto err_out;
949 }
926 950
927 nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, 951 nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION,
928 sizeof(*t), 0); 952 sizeof(*t), 0);
929 if (!nlh) 953 if (!nlh) {
954 NL_SET_ERR_MSG(extack, "Failed to create TC action flush notification");
930 goto out_module_put; 955 goto out_module_put;
956 }
931 t = nlmsg_data(nlh); 957 t = nlmsg_data(nlh);
932 t->tca_family = AF_UNSPEC; 958 t->tca_family = AF_UNSPEC;
933 t->tca__pad1 = 0; 959 t->tca__pad1 = 0;
934 t->tca__pad2 = 0; 960 t->tca__pad2 = 0;
935 961
936 nest = nla_nest_start(skb, TCA_ACT_TAB); 962 nest = nla_nest_start(skb, TCA_ACT_TAB);
937 if (nest == NULL) 963 if (!nest) {
964 NL_SET_ERR_MSG(extack, "Failed to add new netlink message");
938 goto out_module_put; 965 goto out_module_put;
966 }
939 967
940 err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops); 968 err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops, extack);
941 if (err <= 0) 969 if (err <= 0) {
970 nla_nest_cancel(skb, nest);
942 goto out_module_put; 971 goto out_module_put;
972 }
943 973
944 nla_nest_end(skb, nest); 974 nla_nest_end(skb, nest);
945 975
@@ -950,6 +980,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
950 n->nlmsg_flags & NLM_F_ECHO); 980 n->nlmsg_flags & NLM_F_ECHO);
951 if (err > 0) 981 if (err > 0)
952 return 0; 982 return 0;
983 if (err < 0)
984 NL_SET_ERR_MSG(extack, "Failed to send TC action flush notification");
953 985
954 return err; 986 return err;
955 987
@@ -962,7 +994,7 @@ err_out:
962 994
963static int 995static int
964tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, 996tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
965 u32 portid) 997 u32 portid, struct netlink_ext_ack *extack)
966{ 998{
967 int ret; 999 int ret;
968 struct sk_buff *skb; 1000 struct sk_buff *skb;
@@ -973,6 +1005,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
973 1005
974 if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION, 1006 if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
975 0, 1) <= 0) { 1007 0, 1) <= 0) {
1008 NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes");
976 kfree_skb(skb); 1009 kfree_skb(skb);
977 return -EINVAL; 1010 return -EINVAL;
978 } 1011 }
@@ -980,6 +1013,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
980 /* now do the delete */ 1013 /* now do the delete */
981 ret = tcf_action_destroy(actions, 0); 1014 ret = tcf_action_destroy(actions, 0);
982 if (ret < 0) { 1015 if (ret < 0) {
1016 NL_SET_ERR_MSG(extack, "Failed to delete TC action");
983 kfree_skb(skb); 1017 kfree_skb(skb);
984 return ret; 1018 return ret;
985 } 1019 }
@@ -993,26 +1027,27 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
993 1027
994static int 1028static int
995tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, 1029tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
996 u32 portid, int event) 1030 u32 portid, int event, struct netlink_ext_ack *extack)
997{ 1031{
998 int i, ret; 1032 int i, ret;
999 struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; 1033 struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
1000 struct tc_action *act; 1034 struct tc_action *act;
1001 LIST_HEAD(actions); 1035 LIST_HEAD(actions);
1002 1036
1003 ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL); 1037 ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
1004 if (ret < 0) 1038 if (ret < 0)
1005 return ret; 1039 return ret;
1006 1040
1007 if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) { 1041 if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
1008 if (tb[1] != NULL) 1042 if (tb[1])
1009 return tca_action_flush(net, tb[1], n, portid); 1043 return tca_action_flush(net, tb[1], n, portid, extack);
1010 else 1044
1011 return -EINVAL; 1045 NL_SET_ERR_MSG(extack, "Invalid netlink attributes while flushing TC action");
1046 return -EINVAL;
1012 } 1047 }
1013 1048
1014 for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { 1049 for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
1015 act = tcf_action_get_1(net, tb[i], n, portid); 1050 act = tcf_action_get_1(net, tb[i], n, portid, extack);
1016 if (IS_ERR(act)) { 1051 if (IS_ERR(act)) {
1017 ret = PTR_ERR(act); 1052 ret = PTR_ERR(act);
1018 goto err; 1053 goto err;
@@ -1022,9 +1057,9 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
1022 } 1057 }
1023 1058
1024 if (event == RTM_GETACTION) 1059 if (event == RTM_GETACTION)
1025 ret = tcf_get_notify(net, portid, n, &actions, event); 1060 ret = tcf_get_notify(net, portid, n, &actions, event, extack);
1026 else { /* delete */ 1061 else { /* delete */
1027 ret = tcf_del_notify(net, n, &actions, portid); 1062 ret = tcf_del_notify(net, n, &actions, portid, extack);
1028 if (ret) 1063 if (ret)
1029 goto err; 1064 goto err;
1030 return ret; 1065 return ret;
@@ -1037,7 +1072,7 @@ err:
1037 1072
1038static int 1073static int
1039tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, 1074tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
1040 u32 portid) 1075 u32 portid, struct netlink_ext_ack *extack)
1041{ 1076{
1042 struct sk_buff *skb; 1077 struct sk_buff *skb;
1043 int err = 0; 1078 int err = 0;
@@ -1048,6 +1083,7 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
1048 1083
1049 if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags, 1084 if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags,
1050 RTM_NEWACTION, 0, 0) <= 0) { 1085 RTM_NEWACTION, 0, 0) <= 0) {
1086 NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while deleting TC action");
1051 kfree_skb(skb); 1087 kfree_skb(skb);
1052 return -EINVAL; 1088 return -EINVAL;
1053 } 1089 }
@@ -1060,16 +1096,18 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
1060} 1096}
1061 1097
1062static int tcf_action_add(struct net *net, struct nlattr *nla, 1098static int tcf_action_add(struct net *net, struct nlattr *nla,
1063 struct nlmsghdr *n, u32 portid, int ovr) 1099 struct nlmsghdr *n, u32 portid, int ovr,
1100 struct netlink_ext_ack *extack)
1064{ 1101{
1065 int ret = 0; 1102 int ret = 0;
1066 LIST_HEAD(actions); 1103 LIST_HEAD(actions);
1067 1104
1068 ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions); 1105 ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions,
1106 extack);
1069 if (ret) 1107 if (ret)
1070 return ret; 1108 return ret;
1071 1109
1072 return tcf_add_notify(net, n, &actions, portid); 1110 return tcf_add_notify(net, n, &actions, portid, extack);
1073} 1111}
1074 1112
1075static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON; 1113static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
@@ -1097,7 +1135,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
1097 return ret; 1135 return ret;
1098 1136
1099 if (tca[TCA_ACT_TAB] == NULL) { 1137 if (tca[TCA_ACT_TAB] == NULL) {
1100 pr_notice("tc_ctl_action: received NO action attribs\n"); 1138 NL_SET_ERR_MSG(extack, "Netlink action attributes missing");
1101 return -EINVAL; 1139 return -EINVAL;
1102 } 1140 }
1103 1141
@@ -1113,17 +1151,18 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
1113 if (n->nlmsg_flags & NLM_F_REPLACE) 1151 if (n->nlmsg_flags & NLM_F_REPLACE)
1114 ovr = 1; 1152 ovr = 1;
1115replay: 1153replay:
1116 ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr); 1154 ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr,
1155 extack);
1117 if (ret == -EAGAIN) 1156 if (ret == -EAGAIN)
1118 goto replay; 1157 goto replay;
1119 break; 1158 break;
1120 case RTM_DELACTION: 1159 case RTM_DELACTION:
1121 ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, 1160 ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
1122 portid, RTM_DELACTION); 1161 portid, RTM_DELACTION, extack);
1123 break; 1162 break;
1124 case RTM_GETACTION: 1163 case RTM_GETACTION:
1125 ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, 1164 ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
1126 portid, RTM_GETACTION); 1165 portid, RTM_GETACTION, extack);
1127 break; 1166 break;
1128 default: 1167 default:
1129 BUG(); 1168 BUG();
@@ -1218,7 +1257,7 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
1218 if (nest == NULL) 1257 if (nest == NULL)
1219 goto out_module_put; 1258 goto out_module_put;
1220 1259
1221 ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o); 1260 ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o, NULL);
1222 if (ret < 0) 1261 if (ret < 0)
1223 goto out_module_put; 1262 goto out_module_put;
1224 1263
@@ -1454,6 +1493,7 @@ static struct pernet_operations tcf_action_net_ops = {
1454 .exit = tcf_action_net_exit, 1493 .exit = tcf_action_net_exit,
1455 .id = &tcf_action_net_id, 1494 .id = &tcf_action_net_id,
1456 .size = sizeof(struct tcf_action_net), 1495 .size = sizeof(struct tcf_action_net),
1496 .async = true,
1457}; 1497};
1458 1498
1459static int __init tc_action_init(void) 1499static int __init tc_action_init(void)
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index b3f2c15affa7..cb3c5d403c88 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -272,7 +272,7 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
272 272
273static int tcf_bpf_init(struct net *net, struct nlattr *nla, 273static int tcf_bpf_init(struct net *net, struct nlattr *nla,
274 struct nlattr *est, struct tc_action **act, 274 struct nlattr *est, struct tc_action **act,
275 int replace, int bind) 275 int replace, int bind, struct netlink_ext_ack *extack)
276{ 276{
277 struct tc_action_net *tn = net_generic(net, bpf_net_id); 277 struct tc_action_net *tn = net_generic(net, bpf_net_id);
278 struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; 278 struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
@@ -367,14 +367,16 @@ static void tcf_bpf_cleanup(struct tc_action *act)
367 367
368static int tcf_bpf_walker(struct net *net, struct sk_buff *skb, 368static int tcf_bpf_walker(struct net *net, struct sk_buff *skb,
369 struct netlink_callback *cb, int type, 369 struct netlink_callback *cb, int type,
370 const struct tc_action_ops *ops) 370 const struct tc_action_ops *ops,
371 struct netlink_ext_ack *extack)
371{ 372{
372 struct tc_action_net *tn = net_generic(net, bpf_net_id); 373 struct tc_action_net *tn = net_generic(net, bpf_net_id);
373 374
374 return tcf_generic_walker(tn, skb, cb, type, ops); 375 return tcf_generic_walker(tn, skb, cb, type, ops, extack);
375} 376}
376 377
377static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index) 378static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
379 struct netlink_ext_ack *extack)
378{ 380{
379 struct tc_action_net *tn = net_generic(net, bpf_net_id); 381 struct tc_action_net *tn = net_generic(net, bpf_net_id);
380 382
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 2b15ba84e0c8..e4b880fa51fe 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -96,7 +96,8 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
96 96
97static int tcf_connmark_init(struct net *net, struct nlattr *nla, 97static int tcf_connmark_init(struct net *net, struct nlattr *nla,
98 struct nlattr *est, struct tc_action **a, 98 struct nlattr *est, struct tc_action **a,
99 int ovr, int bind) 99 int ovr, int bind,
100 struct netlink_ext_ack *extack)
100{ 101{
101 struct tc_action_net *tn = net_generic(net, connmark_net_id); 102 struct tc_action_net *tn = net_generic(net, connmark_net_id);
102 struct nlattr *tb[TCA_CONNMARK_MAX + 1]; 103 struct nlattr *tb[TCA_CONNMARK_MAX + 1];
@@ -176,14 +177,16 @@ nla_put_failure:
176 177
177static int tcf_connmark_walker(struct net *net, struct sk_buff *skb, 178static int tcf_connmark_walker(struct net *net, struct sk_buff *skb,
178 struct netlink_callback *cb, int type, 179 struct netlink_callback *cb, int type,
179 const struct tc_action_ops *ops) 180 const struct tc_action_ops *ops,
181 struct netlink_ext_ack *extack)
180{ 182{
181 struct tc_action_net *tn = net_generic(net, connmark_net_id); 183 struct tc_action_net *tn = net_generic(net, connmark_net_id);
182 184
183 return tcf_generic_walker(tn, skb, cb, type, ops); 185 return tcf_generic_walker(tn, skb, cb, type, ops, extack);
184} 186}
185 187
186static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index) 188static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index,
189 struct netlink_ext_ack *extack)
187{ 190{
188 struct tc_action_net *tn = net_generic(net, connmark_net_id); 191 struct tc_action_net *tn = net_generic(net, connmark_net_id);
189 192
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index b7ba9b06b147..d5c2e528d150 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -46,7 +46,7 @@ static struct tc_action_ops act_csum_ops;
46 46
47static int tcf_csum_init(struct net *net, struct nlattr *nla, 47static int tcf_csum_init(struct net *net, struct nlattr *nla,
48 struct nlattr *est, struct tc_action **a, int ovr, 48 struct nlattr *est, struct tc_action **a, int ovr,
49 int bind) 49 int bind, struct netlink_ext_ack *extack)
50{ 50{
51 struct tc_action_net *tn = net_generic(net, csum_net_id); 51 struct tc_action_net *tn = net_generic(net, csum_net_id);
52 struct tcf_csum_params *params_old, *params_new; 52 struct tcf_csum_params *params_old, *params_new;
@@ -631,14 +631,16 @@ static void tcf_csum_cleanup(struct tc_action *a)
631 631
632static int tcf_csum_walker(struct net *net, struct sk_buff *skb, 632static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
633 struct netlink_callback *cb, int type, 633 struct netlink_callback *cb, int type,
634 const struct tc_action_ops *ops) 634 const struct tc_action_ops *ops,
635 struct netlink_ext_ack *extack)
635{ 636{
636 struct tc_action_net *tn = net_generic(net, csum_net_id); 637 struct tc_action_net *tn = net_generic(net, csum_net_id);
637 638
638 return tcf_generic_walker(tn, skb, cb, type, ops); 639 return tcf_generic_walker(tn, skb, cb, type, ops, extack);
639} 640}
640 641
641static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index) 642static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index,
643 struct netlink_ext_ack *extack)
642{ 644{
643 struct tc_action_net *tn = net_generic(net, csum_net_id); 645 struct tc_action_net *tn = net_generic(net, csum_net_id);
644 646
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index b56986d41c87..f072bcf33760 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -56,7 +56,7 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
56 56
57static int tcf_gact_init(struct net *net, struct nlattr *nla, 57static int tcf_gact_init(struct net *net, struct nlattr *nla,
58 struct nlattr *est, struct tc_action **a, 58 struct nlattr *est, struct tc_action **a,
59 int ovr, int bind) 59 int ovr, int bind, struct netlink_ext_ack *extack)
60{ 60{
61 struct tc_action_net *tn = net_generic(net, gact_net_id); 61 struct tc_action_net *tn = net_generic(net, gact_net_id);
62 struct nlattr *tb[TCA_GACT_MAX + 1]; 62 struct nlattr *tb[TCA_GACT_MAX + 1];
@@ -201,14 +201,16 @@ nla_put_failure:
201 201
202static int tcf_gact_walker(struct net *net, struct sk_buff *skb, 202static int tcf_gact_walker(struct net *net, struct sk_buff *skb,
203 struct netlink_callback *cb, int type, 203 struct netlink_callback *cb, int type,
204 const struct tc_action_ops *ops) 204 const struct tc_action_ops *ops,
205 struct netlink_ext_ack *extack)
205{ 206{
206 struct tc_action_net *tn = net_generic(net, gact_net_id); 207 struct tc_action_net *tn = net_generic(net, gact_net_id);
207 208
208 return tcf_generic_walker(tn, skb, cb, type, ops); 209 return tcf_generic_walker(tn, skb, cb, type, ops, extack);
209} 210}
210 211
211static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index) 212static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index,
213 struct netlink_ext_ack *extack)
212{ 214{
213 struct tc_action_net *tn = net_generic(net, gact_net_id); 215 struct tc_action_net *tn = net_generic(net, gact_net_id);
214 216
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 5954e992685a..a5994cf0512b 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -447,7 +447,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
447 447
448static int tcf_ife_init(struct net *net, struct nlattr *nla, 448static int tcf_ife_init(struct net *net, struct nlattr *nla,
449 struct nlattr *est, struct tc_action **a, 449 struct nlattr *est, struct tc_action **a,
450 int ovr, int bind) 450 int ovr, int bind, struct netlink_ext_ack *extack)
451{ 451{
452 struct tc_action_net *tn = net_generic(net, ife_net_id); 452 struct tc_action_net *tn = net_generic(net, ife_net_id);
453 struct nlattr *tb[TCA_IFE_MAX + 1]; 453 struct nlattr *tb[TCA_IFE_MAX + 1];
@@ -824,14 +824,16 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
824 824
825static int tcf_ife_walker(struct net *net, struct sk_buff *skb, 825static int tcf_ife_walker(struct net *net, struct sk_buff *skb,
826 struct netlink_callback *cb, int type, 826 struct netlink_callback *cb, int type,
827 const struct tc_action_ops *ops) 827 const struct tc_action_ops *ops,
828 struct netlink_ext_ack *extack)
828{ 829{
829 struct tc_action_net *tn = net_generic(net, ife_net_id); 830 struct tc_action_net *tn = net_generic(net, ife_net_id);
830 831
831 return tcf_generic_walker(tn, skb, cb, type, ops); 832 return tcf_generic_walker(tn, skb, cb, type, ops, extack);
832} 833}
833 834
834static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index) 835static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index,
836 struct netlink_ext_ack *extack)
835{ 837{
836 struct tc_action_net *tn = net_generic(net, ife_net_id); 838 struct tc_action_net *tn = net_generic(net, ife_net_id);
837 839
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 06e380ae0928..9784629090ad 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -193,7 +193,7 @@ err1:
193 193
194static int tcf_ipt_init(struct net *net, struct nlattr *nla, 194static int tcf_ipt_init(struct net *net, struct nlattr *nla,
195 struct nlattr *est, struct tc_action **a, int ovr, 195 struct nlattr *est, struct tc_action **a, int ovr,
196 int bind) 196 int bind, struct netlink_ext_ack *extack)
197{ 197{
198 return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr, 198 return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
199 bind); 199 bind);
@@ -201,7 +201,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla,
201 201
202static int tcf_xt_init(struct net *net, struct nlattr *nla, 202static int tcf_xt_init(struct net *net, struct nlattr *nla,
203 struct nlattr *est, struct tc_action **a, int ovr, 203 struct nlattr *est, struct tc_action **a, int ovr,
204 int bind) 204 int bind, struct netlink_ext_ack *extack)
205{ 205{
206 return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr, 206 return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
207 bind); 207 bind);
@@ -303,14 +303,16 @@ nla_put_failure:
303 303
304static int tcf_ipt_walker(struct net *net, struct sk_buff *skb, 304static int tcf_ipt_walker(struct net *net, struct sk_buff *skb,
305 struct netlink_callback *cb, int type, 305 struct netlink_callback *cb, int type,
306 const struct tc_action_ops *ops) 306 const struct tc_action_ops *ops,
307 struct netlink_ext_ack *extack)
307{ 308{
308 struct tc_action_net *tn = net_generic(net, ipt_net_id); 309 struct tc_action_net *tn = net_generic(net, ipt_net_id);
309 310
310 return tcf_generic_walker(tn, skb, cb, type, ops); 311 return tcf_generic_walker(tn, skb, cb, type, ops, extack);
311} 312}
312 313
313static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index) 314static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index,
315 struct netlink_ext_ack *extack)
314{ 316{
315 struct tc_action_net *tn = net_generic(net, ipt_net_id); 317 struct tc_action_net *tn = net_generic(net, ipt_net_id);
316 318
@@ -351,14 +353,16 @@ static struct pernet_operations ipt_net_ops = {
351 353
352static int tcf_xt_walker(struct net *net, struct sk_buff *skb, 354static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
353 struct netlink_callback *cb, int type, 355 struct netlink_callback *cb, int type,
354 const struct tc_action_ops *ops) 356 const struct tc_action_ops *ops,
357 struct netlink_ext_ack *extack)
355{ 358{
356 struct tc_action_net *tn = net_generic(net, xt_net_id); 359 struct tc_action_net *tn = net_generic(net, xt_net_id);
357 360
358 return tcf_generic_walker(tn, skb, cb, type, ops); 361 return tcf_generic_walker(tn, skb, cb, type, ops, extack);
359} 362}
360 363
361static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index) 364static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index,
365 struct netlink_ext_ack *extack)
362{ 366{
363 struct tc_action_net *tn = net_generic(net, xt_net_id); 367 struct tc_action_net *tn = net_generic(net, xt_net_id);
364 368
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index e6ff88f72900..fd34015331ab 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -69,7 +69,7 @@ static struct tc_action_ops act_mirred_ops;
69 69
70static int tcf_mirred_init(struct net *net, struct nlattr *nla, 70static int tcf_mirred_init(struct net *net, struct nlattr *nla,
71 struct nlattr *est, struct tc_action **a, int ovr, 71 struct nlattr *est, struct tc_action **a, int ovr,
72 int bind) 72 int bind, struct netlink_ext_ack *extack)
73{ 73{
74 struct tc_action_net *tn = net_generic(net, mirred_net_id); 74 struct tc_action_net *tn = net_generic(net, mirred_net_id);
75 struct nlattr *tb[TCA_MIRRED_MAX + 1]; 75 struct nlattr *tb[TCA_MIRRED_MAX + 1];
@@ -80,13 +80,17 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
80 bool exists = false; 80 bool exists = false;
81 int ret; 81 int ret;
82 82
83 if (nla == NULL) 83 if (!nla) {
84 NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed");
84 return -EINVAL; 85 return -EINVAL;
85 ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, NULL); 86 }
87 ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, extack);
86 if (ret < 0) 88 if (ret < 0)
87 return ret; 89 return ret;
88 if (tb[TCA_MIRRED_PARMS] == NULL) 90 if (!tb[TCA_MIRRED_PARMS]) {
91 NL_SET_ERR_MSG_MOD(extack, "Missing required mirred parameters");
89 return -EINVAL; 92 return -EINVAL;
93 }
90 parm = nla_data(tb[TCA_MIRRED_PARMS]); 94 parm = nla_data(tb[TCA_MIRRED_PARMS]);
91 95
92 exists = tcf_idr_check(tn, parm->index, a, bind); 96 exists = tcf_idr_check(tn, parm->index, a, bind);
@@ -102,6 +106,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
102 default: 106 default:
103 if (exists) 107 if (exists)
104 tcf_idr_release(*a, bind); 108 tcf_idr_release(*a, bind);
109 NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option");
105 return -EINVAL; 110 return -EINVAL;
106 } 111 }
107 if (parm->ifindex) { 112 if (parm->ifindex) {
@@ -117,8 +122,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
117 } 122 }
118 123
119 if (!exists) { 124 if (!exists) {
120 if (dev == NULL) 125 if (!dev) {
126 NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
121 return -EINVAL; 127 return -EINVAL;
128 }
122 ret = tcf_idr_create(tn, parm->index, est, a, 129 ret = tcf_idr_create(tn, parm->index, est, a,
123 &act_mirred_ops, bind, true); 130 &act_mirred_ops, bind, true);
124 if (ret) 131 if (ret)
@@ -265,14 +272,16 @@ nla_put_failure:
265 272
266static int tcf_mirred_walker(struct net *net, struct sk_buff *skb, 273static int tcf_mirred_walker(struct net *net, struct sk_buff *skb,
267 struct netlink_callback *cb, int type, 274 struct netlink_callback *cb, int type,
268 const struct tc_action_ops *ops) 275 const struct tc_action_ops *ops,
276 struct netlink_ext_ack *extack)
269{ 277{
270 struct tc_action_net *tn = net_generic(net, mirred_net_id); 278 struct tc_action_net *tn = net_generic(net, mirred_net_id);
271 279
272 return tcf_generic_walker(tn, skb, cb, type, ops); 280 return tcf_generic_walker(tn, skb, cb, type, ops, extack);
273} 281}
274 282
275static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index) 283static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index,
284 struct netlink_ext_ack *extack)
276{ 285{
277 struct tc_action_net *tn = net_generic(net, mirred_net_id); 286 struct tc_action_net *tn = net_generic(net, mirred_net_id);
278 287
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 98c6a4b2f523..4b5848b6c252 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -37,7 +37,8 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
37}; 37};
38 38
39static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, 39static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
40 struct tc_action **a, int ovr, int bind) 40 struct tc_action **a, int ovr, int bind,
41 struct netlink_ext_ack *extack)
41{ 42{
42 struct tc_action_net *tn = net_generic(net, nat_net_id); 43 struct tc_action_net *tn = net_generic(net, nat_net_id);
43 struct nlattr *tb[TCA_NAT_MAX + 1]; 44 struct nlattr *tb[TCA_NAT_MAX + 1];
@@ -277,14 +278,16 @@ nla_put_failure:
277 278
278static int tcf_nat_walker(struct net *net, struct sk_buff *skb, 279static int tcf_nat_walker(struct net *net, struct sk_buff *skb,
279 struct netlink_callback *cb, int type, 280 struct netlink_callback *cb, int type,
280 const struct tc_action_ops *ops) 281 const struct tc_action_ops *ops,
282 struct netlink_ext_ack *extack)
281{ 283{
282 struct tc_action_net *tn = net_generic(net, nat_net_id); 284 struct tc_action_net *tn = net_generic(net, nat_net_id);
283 285
284 return tcf_generic_walker(tn, skb, cb, type, ops); 286 return tcf_generic_walker(tn, skb, cb, type, ops, extack);
285} 287}
286 288
287static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index) 289static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index,
290 struct netlink_ext_ack *extack)
288{ 291{
289 struct tc_action_net *tn = net_generic(net, nat_net_id); 292 struct tc_action_net *tn = net_generic(net, nat_net_id);
290 293
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 349beaffb29e..094303c27c5e 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -132,7 +132,7 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb,
132 132
133static int tcf_pedit_init(struct net *net, struct nlattr *nla, 133static int tcf_pedit_init(struct net *net, struct nlattr *nla,
134 struct nlattr *est, struct tc_action **a, 134 struct nlattr *est, struct tc_action **a,
135 int ovr, int bind) 135 int ovr, int bind, struct netlink_ext_ack *extack)
136{ 136{
137 struct tc_action_net *tn = net_generic(net, pedit_net_id); 137 struct tc_action_net *tn = net_generic(net, pedit_net_id);
138 struct nlattr *tb[TCA_PEDIT_MAX + 1]; 138 struct nlattr *tb[TCA_PEDIT_MAX + 1];
@@ -419,14 +419,16 @@ nla_put_failure:
419 419
420static int tcf_pedit_walker(struct net *net, struct sk_buff *skb, 420static int tcf_pedit_walker(struct net *net, struct sk_buff *skb,
421 struct netlink_callback *cb, int type, 421 struct netlink_callback *cb, int type,
422 const struct tc_action_ops *ops) 422 const struct tc_action_ops *ops,
423 struct netlink_ext_ack *extack)
423{ 424{
424 struct tc_action_net *tn = net_generic(net, pedit_net_id); 425 struct tc_action_net *tn = net_generic(net, pedit_net_id);
425 426
426 return tcf_generic_walker(tn, skb, cb, type, ops); 427 return tcf_generic_walker(tn, skb, cb, type, ops, extack);
427} 428}
428 429
429static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index) 430static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index,
431 struct netlink_ext_ack *extack)
430{ 432{
431 struct tc_action_net *tn = net_generic(net, pedit_net_id); 433 struct tc_action_net *tn = net_generic(net, pedit_net_id);
432 434
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 95d3c9097b25..ff55bd6c7db0 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -58,11 +58,12 @@ static struct tc_action_ops act_police_ops;
58 58
59static int tcf_act_police_walker(struct net *net, struct sk_buff *skb, 59static int tcf_act_police_walker(struct net *net, struct sk_buff *skb,
60 struct netlink_callback *cb, int type, 60 struct netlink_callback *cb, int type,
61 const struct tc_action_ops *ops) 61 const struct tc_action_ops *ops,
62 struct netlink_ext_ack *extack)
62{ 63{
63 struct tc_action_net *tn = net_generic(net, police_net_id); 64 struct tc_action_net *tn = net_generic(net, police_net_id);
64 65
65 return tcf_generic_walker(tn, skb, cb, type, ops); 66 return tcf_generic_walker(tn, skb, cb, type, ops, extack);
66} 67}
67 68
68static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { 69static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
@@ -74,7 +75,8 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
74 75
75static int tcf_act_police_init(struct net *net, struct nlattr *nla, 76static int tcf_act_police_init(struct net *net, struct nlattr *nla,
76 struct nlattr *est, struct tc_action **a, 77 struct nlattr *est, struct tc_action **a,
77 int ovr, int bind) 78 int ovr, int bind,
79 struct netlink_ext_ack *extack)
78{ 80{
79 int ret = 0, err; 81 int ret = 0, err;
80 struct nlattr *tb[TCA_POLICE_MAX + 1]; 82 struct nlattr *tb[TCA_POLICE_MAX + 1];
@@ -304,7 +306,8 @@ nla_put_failure:
304 return -1; 306 return -1;
305} 307}
306 308
307static int tcf_police_search(struct net *net, struct tc_action **a, u32 index) 309static int tcf_police_search(struct net *net, struct tc_action **a, u32 index,
310 struct netlink_ext_ack *extack)
308{ 311{
309 struct tc_action_net *tn = net_generic(net, police_net_id); 312 struct tc_action_net *tn = net_generic(net, police_net_id);
310 313
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 1ba0df238756..9765145aaf40 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -37,7 +37,7 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
37 37
38static int tcf_sample_init(struct net *net, struct nlattr *nla, 38static int tcf_sample_init(struct net *net, struct nlattr *nla,
39 struct nlattr *est, struct tc_action **a, int ovr, 39 struct nlattr *est, struct tc_action **a, int ovr,
40 int bind) 40 int bind, struct netlink_ext_ack *extack)
41{ 41{
42 struct tc_action_net *tn = net_generic(net, sample_net_id); 42 struct tc_action_net *tn = net_generic(net, sample_net_id);
43 struct nlattr *tb[TCA_SAMPLE_MAX + 1]; 43 struct nlattr *tb[TCA_SAMPLE_MAX + 1];
@@ -202,14 +202,16 @@ nla_put_failure:
202 202
203static int tcf_sample_walker(struct net *net, struct sk_buff *skb, 203static int tcf_sample_walker(struct net *net, struct sk_buff *skb,
204 struct netlink_callback *cb, int type, 204 struct netlink_callback *cb, int type,
205 const struct tc_action_ops *ops) 205 const struct tc_action_ops *ops,
206 struct netlink_ext_ack *extack)
206{ 207{
207 struct tc_action_net *tn = net_generic(net, sample_net_id); 208 struct tc_action_net *tn = net_generic(net, sample_net_id);
208 209
209 return tcf_generic_walker(tn, skb, cb, type, ops); 210 return tcf_generic_walker(tn, skb, cb, type, ops, extack);
210} 211}
211 212
212static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index) 213static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index,
214 struct netlink_ext_ack *extack)
213{ 215{
214 struct tc_action_net *tn = net_generic(net, sample_net_id); 216 struct tc_action_net *tn = net_generic(net, sample_net_id);
215 217
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 425eac11f6da..8244e221fe4f 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -79,7 +79,7 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
79 79
80static int tcf_simp_init(struct net *net, struct nlattr *nla, 80static int tcf_simp_init(struct net *net, struct nlattr *nla,
81 struct nlattr *est, struct tc_action **a, 81 struct nlattr *est, struct tc_action **a,
82 int ovr, int bind) 82 int ovr, int bind, struct netlink_ext_ack *extack)
83{ 83{
84 struct tc_action_net *tn = net_generic(net, simp_net_id); 84 struct tc_action_net *tn = net_generic(net, simp_net_id);
85 struct nlattr *tb[TCA_DEF_MAX + 1]; 85 struct nlattr *tb[TCA_DEF_MAX + 1];
@@ -170,14 +170,16 @@ nla_put_failure:
170 170
171static int tcf_simp_walker(struct net *net, struct sk_buff *skb, 171static int tcf_simp_walker(struct net *net, struct sk_buff *skb,
172 struct netlink_callback *cb, int type, 172 struct netlink_callback *cb, int type,
173 const struct tc_action_ops *ops) 173 const struct tc_action_ops *ops,
174 struct netlink_ext_ack *extack)
174{ 175{
175 struct tc_action_net *tn = net_generic(net, simp_net_id); 176 struct tc_action_net *tn = net_generic(net, simp_net_id);
176 177
177 return tcf_generic_walker(tn, skb, cb, type, ops); 178 return tcf_generic_walker(tn, skb, cb, type, ops, extack);
178} 179}
179 180
180static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index) 181static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index,
182 struct netlink_ext_ack *extack)
181{ 183{
182 struct tc_action_net *tn = net_generic(net, simp_net_id); 184 struct tc_action_net *tn = net_generic(net, simp_net_id);
183 185
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 5a3f691bb545..ddf69fc01bdf 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -66,7 +66,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
66 66
67static int tcf_skbedit_init(struct net *net, struct nlattr *nla, 67static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
68 struct nlattr *est, struct tc_action **a, 68 struct nlattr *est, struct tc_action **a,
69 int ovr, int bind) 69 int ovr, int bind, struct netlink_ext_ack *extack)
70{ 70{
71 struct tc_action_net *tn = net_generic(net, skbedit_net_id); 71 struct tc_action_net *tn = net_generic(net, skbedit_net_id);
72 struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; 72 struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
@@ -208,14 +208,16 @@ nla_put_failure:
208 208
209static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb, 209static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb,
210 struct netlink_callback *cb, int type, 210 struct netlink_callback *cb, int type,
211 const struct tc_action_ops *ops) 211 const struct tc_action_ops *ops,
212 struct netlink_ext_ack *extack)
212{ 213{
213 struct tc_action_net *tn = net_generic(net, skbedit_net_id); 214 struct tc_action_net *tn = net_generic(net, skbedit_net_id);
214 215
215 return tcf_generic_walker(tn, skb, cb, type, ops); 216 return tcf_generic_walker(tn, skb, cb, type, ops, extack);
216} 217}
217 218
218static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index) 219static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index,
220 struct netlink_ext_ack *extack)
219{ 221{
220 struct tc_action_net *tn = net_generic(net, skbedit_net_id); 222 struct tc_action_net *tn = net_generic(net, skbedit_net_id);
221 223
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index fa975262dbac..a406f191cb84 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -84,7 +84,7 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
84 84
85static int tcf_skbmod_init(struct net *net, struct nlattr *nla, 85static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
86 struct nlattr *est, struct tc_action **a, 86 struct nlattr *est, struct tc_action **a,
87 int ovr, int bind) 87 int ovr, int bind, struct netlink_ext_ack *extack)
88{ 88{
89 struct tc_action_net *tn = net_generic(net, skbmod_net_id); 89 struct tc_action_net *tn = net_generic(net, skbmod_net_id);
90 struct nlattr *tb[TCA_SKBMOD_MAX + 1]; 90 struct nlattr *tb[TCA_SKBMOD_MAX + 1];
@@ -232,14 +232,16 @@ nla_put_failure:
232 232
233static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb, 233static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb,
234 struct netlink_callback *cb, int type, 234 struct netlink_callback *cb, int type,
235 const struct tc_action_ops *ops) 235 const struct tc_action_ops *ops,
236 struct netlink_ext_ack *extack)
236{ 237{
237 struct tc_action_net *tn = net_generic(net, skbmod_net_id); 238 struct tc_action_net *tn = net_generic(net, skbmod_net_id);
238 239
239 return tcf_generic_walker(tn, skb, cb, type, ops); 240 return tcf_generic_walker(tn, skb, cb, type, ops, extack);
240} 241}
241 242
242static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index) 243static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index,
244 struct netlink_ext_ack *extack)
243{ 245{
244 struct tc_action_net *tn = net_generic(net, skbmod_net_id); 246 struct tc_action_net *tn = net_generic(net, skbmod_net_id);
245 247
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 0e23aac09ad6..41ff9d0e5c62 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -70,7 +70,7 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
70 70
71static int tunnel_key_init(struct net *net, struct nlattr *nla, 71static int tunnel_key_init(struct net *net, struct nlattr *nla,
72 struct nlattr *est, struct tc_action **a, 72 struct nlattr *est, struct tc_action **a,
73 int ovr, int bind) 73 int ovr, int bind, struct netlink_ext_ack *extack)
74{ 74{
75 struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); 75 struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
76 struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; 76 struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
@@ -291,14 +291,16 @@ nla_put_failure:
291 291
292static int tunnel_key_walker(struct net *net, struct sk_buff *skb, 292static int tunnel_key_walker(struct net *net, struct sk_buff *skb,
293 struct netlink_callback *cb, int type, 293 struct netlink_callback *cb, int type,
294 const struct tc_action_ops *ops) 294 const struct tc_action_ops *ops,
295 struct netlink_ext_ack *extack)
295{ 296{
296 struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); 297 struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
297 298
298 return tcf_generic_walker(tn, skb, cb, type, ops); 299 return tcf_generic_walker(tn, skb, cb, type, ops, extack);
299} 300}
300 301
301static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) 302static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index,
303 struct netlink_ext_ack *extack)
302{ 304{
303 struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); 305 struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
304 306
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index e1a1b3f3983a..71411a255f04 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -109,7 +109,7 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
109 109
110static int tcf_vlan_init(struct net *net, struct nlattr *nla, 110static int tcf_vlan_init(struct net *net, struct nlattr *nla,
111 struct nlattr *est, struct tc_action **a, 111 struct nlattr *est, struct tc_action **a,
112 int ovr, int bind) 112 int ovr, int bind, struct netlink_ext_ack *extack)
113{ 113{
114 struct tc_action_net *tn = net_generic(net, vlan_net_id); 114 struct tc_action_net *tn = net_generic(net, vlan_net_id);
115 struct nlattr *tb[TCA_VLAN_MAX + 1]; 115 struct nlattr *tb[TCA_VLAN_MAX + 1];
@@ -267,14 +267,16 @@ nla_put_failure:
267 267
268static int tcf_vlan_walker(struct net *net, struct sk_buff *skb, 268static int tcf_vlan_walker(struct net *net, struct sk_buff *skb,
269 struct netlink_callback *cb, int type, 269 struct netlink_callback *cb, int type,
270 const struct tc_action_ops *ops) 270 const struct tc_action_ops *ops,
271 struct netlink_ext_ack *extack)
271{ 272{
272 struct tc_action_net *tn = net_generic(net, vlan_net_id); 273 struct tc_action_net *tn = net_generic(net, vlan_net_id);
273 274
274 return tcf_generic_walker(tn, skb, cb, type, ops); 275 return tcf_generic_walker(tn, skb, cb, type, ops, extack);
275} 276}
276 277
277static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index) 278static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index,
279 struct netlink_ext_ack *extack)
278{ 280{
279 struct tc_action_net *tn = net_generic(net, vlan_net_id); 281 struct tc_action_net *tn = net_generic(net, vlan_net_id);
280 282
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 2bc1bc23d42e..3e14d38e5d42 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -376,17 +376,12 @@ struct tcf_net {
376static unsigned int tcf_net_id; 376static unsigned int tcf_net_id;
377 377
378static int tcf_block_insert(struct tcf_block *block, struct net *net, 378static int tcf_block_insert(struct tcf_block *block, struct net *net,
379 u32 block_index, struct netlink_ext_ack *extack) 379 struct netlink_ext_ack *extack)
380{ 380{
381 struct tcf_net *tn = net_generic(net, tcf_net_id); 381 struct tcf_net *tn = net_generic(net, tcf_net_id);
382 int err;
383 382
384 err = idr_alloc_u32(&tn->idr, block, &block_index, block_index, 383 return idr_alloc_u32(&tn->idr, block, &block->index, block->index,
385 GFP_KERNEL); 384 GFP_KERNEL);
386 if (err)
387 return err;
388 block->index = block_index;
389 return 0;
390} 385}
391 386
392static void tcf_block_remove(struct tcf_block *block, struct net *net) 387static void tcf_block_remove(struct tcf_block *block, struct net *net)
@@ -397,6 +392,7 @@ static void tcf_block_remove(struct tcf_block *block, struct net *net)
397} 392}
398 393
399static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, 394static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
395 u32 block_index,
400 struct netlink_ext_ack *extack) 396 struct netlink_ext_ack *extack)
401{ 397{
402 struct tcf_block *block; 398 struct tcf_block *block;
@@ -419,10 +415,13 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
419 err = -ENOMEM; 415 err = -ENOMEM;
420 goto err_chain_create; 416 goto err_chain_create;
421 } 417 }
422 block->net = qdisc_net(q);
423 block->refcnt = 1; 418 block->refcnt = 1;
424 block->net = net; 419 block->net = net;
425 block->q = q; 420 block->index = block_index;
421
422 /* Don't store q pointer for blocks which are shared */
423 if (!tcf_block_shared(block))
424 block->q = q;
426 return block; 425 return block;
427 426
428err_chain_create: 427err_chain_create:
@@ -518,13 +517,12 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
518 } 517 }
519 518
520 if (!block) { 519 if (!block) {
521 block = tcf_block_create(net, q, extack); 520 block = tcf_block_create(net, q, ei->block_index, extack);
522 if (IS_ERR(block)) 521 if (IS_ERR(block))
523 return PTR_ERR(block); 522 return PTR_ERR(block);
524 created = true; 523 created = true;
525 if (ei->block_index) { 524 if (tcf_block_shared(block)) {
526 err = tcf_block_insert(block, net, 525 err = tcf_block_insert(block, net, extack);
527 ei->block_index, extack);
528 if (err) 526 if (err)
529 goto err_block_insert; 527 goto err_block_insert;
530 } 528 }
@@ -1434,7 +1432,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
1434 if (exts->police && tb[exts->police]) { 1432 if (exts->police && tb[exts->police]) {
1435 act = tcf_action_init_1(net, tp, tb[exts->police], 1433 act = tcf_action_init_1(net, tp, tb[exts->police],
1436 rate_tlv, "police", ovr, 1434 rate_tlv, "police", ovr,
1437 TCA_ACT_BIND); 1435 TCA_ACT_BIND, extack);
1438 if (IS_ERR(act)) 1436 if (IS_ERR(act))
1439 return PTR_ERR(act); 1437 return PTR_ERR(act);
1440 1438
@@ -1447,7 +1445,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
1447 1445
1448 err = tcf_action_init(net, tp, tb[exts->action], 1446 err = tcf_action_init(net, tp, tb[exts->action],
1449 rate_tlv, NULL, ovr, TCA_ACT_BIND, 1447 rate_tlv, NULL, ovr, TCA_ACT_BIND,
1450 &actions); 1448 &actions, extack);
1451 if (err) 1449 if (err)
1452 return err; 1450 return err;
1453 list_for_each_entry(act, &actions, list) 1451 list_for_each_entry(act, &actions, list)
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 6c7601a530e3..ed8b6a24b9e9 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -96,7 +96,7 @@ struct tc_u_hnode {
96 96
97struct tc_u_common { 97struct tc_u_common {
98 struct tc_u_hnode __rcu *hlist; 98 struct tc_u_hnode __rcu *hlist;
99 struct tcf_block *block; 99 void *ptr;
100 int refcnt; 100 int refcnt;
101 struct idr handle_idr; 101 struct idr handle_idr;
102 struct hlist_node hnode; 102 struct hlist_node hnode;
@@ -330,9 +330,25 @@ static struct hlist_head *tc_u_common_hash;
330#define U32_HASH_SHIFT 10 330#define U32_HASH_SHIFT 10
331#define U32_HASH_SIZE (1 << U32_HASH_SHIFT) 331#define U32_HASH_SIZE (1 << U32_HASH_SHIFT)
332 332
333static void *tc_u_common_ptr(const struct tcf_proto *tp)
334{
335 struct tcf_block *block = tp->chain->block;
336
337 /* The block sharing is currently supported only
338 * for classless qdiscs. In that case we use block
339 * for tc_u_common identification. In case the
340 * block is not shared, block->q is a valid pointer
341 * and we can use that. That works for classful qdiscs.
342 */
343 if (tcf_block_shared(block))
344 return block;
345 else
346 return block->q;
347}
348
333static unsigned int tc_u_hash(const struct tcf_proto *tp) 349static unsigned int tc_u_hash(const struct tcf_proto *tp)
334{ 350{
335 return hash_ptr(tp->chain->block, U32_HASH_SHIFT); 351 return hash_ptr(tc_u_common_ptr(tp), U32_HASH_SHIFT);
336} 352}
337 353
338static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp) 354static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
@@ -342,7 +358,7 @@ static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
342 358
343 h = tc_u_hash(tp); 359 h = tc_u_hash(tp);
344 hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) { 360 hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
345 if (tc->block == tp->chain->block) 361 if (tc->ptr == tc_u_common_ptr(tp))
346 return tc; 362 return tc;
347 } 363 }
348 return NULL; 364 return NULL;
@@ -371,7 +387,7 @@ static int u32_init(struct tcf_proto *tp)
371 kfree(root_ht); 387 kfree(root_ht);
372 return -ENOBUFS; 388 return -ENOBUFS;
373 } 389 }
374 tp_c->block = tp->chain->block; 390 tp_c->ptr = tc_u_common_ptr(tp);
375 INIT_HLIST_NODE(&tp_c->hnode); 391 INIT_HLIST_NODE(&tp_c->hnode);
376 idr_init(&tp_c->handle_idr); 392 idr_init(&tp_c->handle_idr);
377 393
diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c
new file mode 100644
index 000000000000..a5f34e930eff
--- /dev/null
+++ b/net/sched/em_ipt.c
@@ -0,0 +1,257 @@
1/*
2 * net/sched/em_ipt.c IPtables matches Ematch
3 *
4 * (c) 2018 Eyal Birger <eyal.birger@gmail.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/gfp.h>
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/kernel.h>
16#include <linux/string.h>
17#include <linux/skbuff.h>
18#include <linux/tc_ematch/tc_em_ipt.h>
19#include <linux/netfilter.h>
20#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter_ipv4/ip_tables.h>
22#include <linux/netfilter_ipv6/ip6_tables.h>
23#include <net/pkt_cls.h>
24
25struct em_ipt_match {
26 const struct xt_match *match;
27 u32 hook;
28 u8 match_data[0] __aligned(8);
29};
30
31struct em_ipt_xt_match {
32 char *match_name;
33 int (*validate_match_data)(struct nlattr **tb, u8 mrev);
34};
35
36static const struct nla_policy em_ipt_policy[TCA_EM_IPT_MAX + 1] = {
37 [TCA_EM_IPT_MATCH_NAME] = { .type = NLA_STRING,
38 .len = XT_EXTENSION_MAXNAMELEN },
39 [TCA_EM_IPT_MATCH_REVISION] = { .type = NLA_U8 },
40 [TCA_EM_IPT_HOOK] = { .type = NLA_U32 },
41 [TCA_EM_IPT_NFPROTO] = { .type = NLA_U8 },
42 [TCA_EM_IPT_MATCH_DATA] = { .type = NLA_UNSPEC },
43};
44
45static int check_match(struct net *net, struct em_ipt_match *im, int mdata_len)
46{
47 struct xt_mtchk_param mtpar = {};
48 union {
49 struct ipt_entry e4;
50 struct ip6t_entry e6;
51 } e = {};
52
53 mtpar.net = net;
54 mtpar.table = "filter";
55 mtpar.hook_mask = 1 << im->hook;
56 mtpar.family = im->match->family;
57 mtpar.match = im->match;
58 mtpar.entryinfo = &e;
59 mtpar.matchinfo = (void *)im->match_data;
60 return xt_check_match(&mtpar, mdata_len, 0, 0);
61}
62
63static int policy_validate_match_data(struct nlattr **tb, u8 mrev)
64{
65 if (mrev != 0) {
66 pr_err("only policy match revision 0 supported");
67 return -EINVAL;
68 }
69
70 if (nla_get_u32(tb[TCA_EM_IPT_HOOK]) != NF_INET_PRE_ROUTING) {
71 pr_err("policy can only be matched on NF_INET_PRE_ROUTING");
72 return -EINVAL;
73 }
74
75 return 0;
76}
77
78static const struct em_ipt_xt_match em_ipt_xt_matches[] = {
79 {
80 .match_name = "policy",
81 .validate_match_data = policy_validate_match_data
82 },
83 {}
84};
85
86static struct xt_match *get_xt_match(struct nlattr **tb)
87{
88 const struct em_ipt_xt_match *m;
89 struct nlattr *mname_attr;
90 u8 nfproto, mrev = 0;
91 int ret;
92
93 mname_attr = tb[TCA_EM_IPT_MATCH_NAME];
94 for (m = em_ipt_xt_matches; m->match_name; m++) {
95 if (!nla_strcmp(mname_attr, m->match_name))
96 break;
97 }
98
99 if (!m->match_name) {
100 pr_err("Unsupported xt match");
101 return ERR_PTR(-EINVAL);
102 }
103
104 if (tb[TCA_EM_IPT_MATCH_REVISION])
105 mrev = nla_get_u8(tb[TCA_EM_IPT_MATCH_REVISION]);
106
107 ret = m->validate_match_data(tb, mrev);
108 if (ret < 0)
109 return ERR_PTR(ret);
110
111 nfproto = nla_get_u8(tb[TCA_EM_IPT_NFPROTO]);
112 return xt_request_find_match(nfproto, m->match_name, mrev);
113}
114
115static int em_ipt_change(struct net *net, void *data, int data_len,
116 struct tcf_ematch *em)
117{
118 struct nlattr *tb[TCA_EM_IPT_MAX + 1];
119 struct em_ipt_match *im = NULL;
120 struct xt_match *match;
121 int mdata_len, ret;
122
123 ret = nla_parse(tb, TCA_EM_IPT_MAX, data, data_len, em_ipt_policy,
124 NULL);
125 if (ret < 0)
126 return ret;
127
128 if (!tb[TCA_EM_IPT_HOOK] || !tb[TCA_EM_IPT_MATCH_NAME] ||
129 !tb[TCA_EM_IPT_MATCH_DATA] || !tb[TCA_EM_IPT_NFPROTO])
130 return -EINVAL;
131
132 match = get_xt_match(tb);
133 if (IS_ERR(match)) {
134 pr_err("unable to load match\n");
135 return PTR_ERR(match);
136 }
137
138 mdata_len = XT_ALIGN(nla_len(tb[TCA_EM_IPT_MATCH_DATA]));
139 im = kzalloc(sizeof(*im) + mdata_len, GFP_KERNEL);
140 if (!im) {
141 ret = -ENOMEM;
142 goto err;
143 }
144
145 im->match = match;
146 im->hook = nla_get_u32(tb[TCA_EM_IPT_HOOK]);
147 nla_memcpy(im->match_data, tb[TCA_EM_IPT_MATCH_DATA], mdata_len);
148
149 ret = check_match(net, im, mdata_len);
150 if (ret)
151 goto err;
152
153 em->datalen = sizeof(*im) + mdata_len;
154 em->data = (unsigned long)im;
155 return 0;
156
157err:
158 kfree(im);
159 module_put(match->me);
160 return ret;
161}
162
163static void em_ipt_destroy(struct tcf_ematch *em)
164{
165 struct em_ipt_match *im = (void *)em->data;
166
167 if (!im)
168 return;
169
170 if (im->match->destroy) {
171 struct xt_mtdtor_param par = {
172 .net = em->net,
173 .match = im->match,
174 .matchinfo = im->match_data,
175 .family = im->match->family
176 };
177 im->match->destroy(&par);
178 }
179 module_put(im->match->me);
180 kfree((void *)im);
181}
182
183static int em_ipt_match(struct sk_buff *skb, struct tcf_ematch *em,
184 struct tcf_pkt_info *info)
185{
186 const struct em_ipt_match *im = (const void *)em->data;
187 struct xt_action_param acpar = {};
188 struct net_device *indev = NULL;
189 struct nf_hook_state state;
190 int ret;
191
192 rcu_read_lock();
193
194 if (skb->skb_iif)
195 indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
196
197 nf_hook_state_init(&state, im->hook, im->match->family,
198 indev ?: skb->dev, skb->dev, NULL, em->net, NULL);
199
200 acpar.match = im->match;
201 acpar.matchinfo = im->match_data;
202 acpar.state = &state;
203
204 ret = im->match->match(skb, &acpar);
205
206 rcu_read_unlock();
207 return ret;
208}
209
210static int em_ipt_dump(struct sk_buff *skb, struct tcf_ematch *em)
211{
212 struct em_ipt_match *im = (void *)em->data;
213
214 if (nla_put_string(skb, TCA_EM_IPT_MATCH_NAME, im->match->name) < 0)
215 return -EMSGSIZE;
216 if (nla_put_u32(skb, TCA_EM_IPT_HOOK, im->hook) < 0)
217 return -EMSGSIZE;
218 if (nla_put_u8(skb, TCA_EM_IPT_MATCH_REVISION, im->match->revision) < 0)
219 return -EMSGSIZE;
220 if (nla_put_u8(skb, TCA_EM_IPT_NFPROTO, im->match->family) < 0)
221 return -EMSGSIZE;
222 if (nla_put(skb, TCA_EM_IPT_MATCH_DATA,
223 im->match->usersize ?: im->match->matchsize,
224 im->match_data) < 0)
225 return -EMSGSIZE;
226
227 return 0;
228}
229
230static struct tcf_ematch_ops em_ipt_ops = {
231 .kind = TCF_EM_IPT,
232 .change = em_ipt_change,
233 .destroy = em_ipt_destroy,
234 .match = em_ipt_match,
235 .dump = em_ipt_dump,
236 .owner = THIS_MODULE,
237 .link = LIST_HEAD_INIT(em_ipt_ops.link)
238};
239
240static int __init init_em_ipt(void)
241{
242 return tcf_em_register(&em_ipt_ops);
243}
244
245static void __exit exit_em_ipt(void)
246{
247 tcf_em_unregister(&em_ipt_ops);
248}
249
250MODULE_LICENSE("GPL");
251MODULE_AUTHOR("Eyal Birger <eyal.birger@gmail.com>");
252MODULE_DESCRIPTION("TC extended match for IPtables matches");
253
254module_init(init_em_ipt);
255module_exit(exit_em_ipt);
256
257MODULE_ALIAS_TCF_EMATCH(TCF_EM_IPT);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index d512f49ee83c..27e672c12492 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -2128,6 +2128,7 @@ static void __net_exit psched_net_exit(struct net *net)
2128static struct pernet_operations psched_net_ops = { 2128static struct pernet_operations psched_net_ops = {
2129 .init = psched_net_init, 2129 .init = psched_net_init,
2130 .exit = psched_net_exit, 2130 .exit = psched_net_exit,
2131 .async = true,
2131}; 2132};
2132 2133
2133static int __init pktsched_init(void) 2134static int __init pktsched_init(void)
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 6776582ec449..e845e4588535 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -15,6 +15,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
15 offload.o stream_sched.o stream_sched_prio.o \ 15 offload.o stream_sched.o stream_sched_prio.o \
16 stream_sched_rr.o stream_interleave.o 16 stream_sched_rr.o stream_interleave.o
17 17
18sctp_diag-y := diag.o
19
18sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o 20sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
19sctp-$(CONFIG_PROC_FS) += proc.o 21sctp-$(CONFIG_PROC_FS) += proc.o
20sctp-$(CONFIG_SYSCTL) += sysctl.o 22sctp-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 291c97b07058..8f6c2e8c0953 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -81,6 +81,12 @@ const char *sctp_cname(const union sctp_subtype cid)
81 case SCTP_CID_RECONF: 81 case SCTP_CID_RECONF:
82 return "RECONF"; 82 return "RECONF";
83 83
84 case SCTP_CID_I_DATA:
85 return "I_DATA";
86
87 case SCTP_CID_I_FWD_TSN:
88 return "I_FWD_TSN";
89
84 default: 90 default:
85 break; 91 break;
86 } 92 }
diff --git a/net/sctp/sctp_diag.c b/net/sctp/diag.c
index a72a7d925d46..078f01a8d582 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/diag.c
@@ -1,3 +1,34 @@
1/* SCTP kernel implementation
2 * (C) Copyright Red Hat Inc. 2017
3 *
4 * This file is part of the SCTP kernel implementation
5 *
6 * These functions implement sctp diag support.
7 *
8 * This SCTP implementation is free software;
9 * you can redistribute it and/or modify it under the terms of
10 * the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * This SCTP implementation is distributed in the hope that it
15 * will be useful, but WITHOUT ANY WARRANTY; without even the implied
16 * ************************
17 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18 * See the GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with GNU CC; see the file COPYING. If not, see
22 * <http://www.gnu.org/licenses/>.
23 *
24 * Please send any bug reports or fixes you make to the
25 * email addresched(es):
26 * lksctp developers <linux-sctp@vger.kernel.org>
27 *
28 * Written or modified by:
29 * Xin Long <lucien.xin@gmail.com>
30 */
31
1#include <linux/module.h> 32#include <linux/module.h>
2#include <linux/inet_diag.h> 33#include <linux/inet_diag.h>
3#include <linux/sock_diag.h> 34#include <linux/sock_diag.h>
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 141c9c466ec1..0247cc432e02 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -897,15 +897,12 @@ int sctp_hash_transport(struct sctp_transport *t)
897 rhl_for_each_entry_rcu(transport, tmp, list, node) 897 rhl_for_each_entry_rcu(transport, tmp, list, node)
898 if (transport->asoc->ep == t->asoc->ep) { 898 if (transport->asoc->ep == t->asoc->ep) {
899 rcu_read_unlock(); 899 rcu_read_unlock();
900 err = -EEXIST; 900 return -EEXIST;
901 goto out;
902 } 901 }
903 rcu_read_unlock(); 902 rcu_read_unlock();
904 903
905 err = rhltable_insert_key(&sctp_transport_hashtable, &arg, 904 err = rhltable_insert_key(&sctp_transport_hashtable, &arg,
906 &t->node, sctp_hash_params); 905 &t->node, sctp_hash_params);
907
908out:
909 if (err) 906 if (err)
910 pr_err_once("insert transport fail, errno %d\n", err); 907 pr_err_once("insert transport fail, errno %d\n", err);
911 908
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e35d4f73d2df..0d873c58e516 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -952,16 +952,16 @@ static int sctp_inet6_supported_addrs(const struct sctp_sock *opt,
952 952
953/* Handle SCTP_I_WANT_MAPPED_V4_ADDR for getpeername() and getsockname() */ 953/* Handle SCTP_I_WANT_MAPPED_V4_ADDR for getpeername() and getsockname() */
954static int sctp_getname(struct socket *sock, struct sockaddr *uaddr, 954static int sctp_getname(struct socket *sock, struct sockaddr *uaddr,
955 int *uaddr_len, int peer) 955 int peer)
956{ 956{
957 int rc; 957 int rc;
958 958
959 rc = inet6_getname(sock, uaddr, uaddr_len, peer); 959 rc = inet6_getname(sock, uaddr, peer);
960 960
961 if (rc != 0) 961 if (rc < 0)
962 return rc; 962 return rc;
963 963
964 *uaddr_len = sctp_v6_addr_to_user(sctp_sk(sock->sk), 964 rc = sctp_v6_addr_to_user(sctp_sk(sock->sk),
965 (union sctp_addr *)uaddr); 965 (union sctp_addr *)uaddr);
966 966
967 return rc; 967 return rc;
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index cedf672487f9..f799043abec9 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -6,7 +6,7 @@
6 * 6 *
7 * This file is part of the SCTP kernel implementation 7 * This file is part of the SCTP kernel implementation
8 * 8 *
9 * These functions manipulate sctp tsn mapping array. 9 * This file contains sctp stream maniuplation primitives and helpers.
10 * 10 *
11 * This SCTP implementation is free software; 11 * This SCTP implementation is free software;
12 * you can redistribute it and/or modify it under the terms of 12 * you can redistribute it and/or modify it under the terms of
diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c
index 8c7cf8f08711..d3764c181299 100644
--- a/net/sctp/stream_interleave.c
+++ b/net/sctp/stream_interleave.c
@@ -3,7 +3,8 @@
3 * 3 *
4 * This file is part of the SCTP kernel implementation 4 * This file is part of the SCTP kernel implementation
5 * 5 *
6 * These functions manipulate sctp stream queue/scheduling. 6 * These functions implement sctp stream message interleaving, mostly
7 * including I-DATA and I-FORWARD-TSN chunks process.
7 * 8 *
8 * This SCTP implementation is free software; 9 * This SCTP implementation is free software;
9 * you can redistribute it and/or modify it under the terms of 10 * you can redistribute it and/or modify it under the terms of
@@ -954,12 +955,8 @@ static void sctp_renege_events(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
954 __u32 freed = 0; 955 __u32 freed = 0;
955 __u16 needed; 956 __u16 needed;
956 957
957 if (chunk) { 958 needed = ntohs(chunk->chunk_hdr->length) -
958 needed = ntohs(chunk->chunk_hdr->length); 959 sizeof(struct sctp_idata_chunk);
959 needed -= sizeof(struct sctp_idata_chunk);
960 } else {
961 needed = SCTP_DEFAULT_MAXWINDOW;
962 }
963 960
964 if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) { 961 if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
965 freed = sctp_ulpq_renege_list(ulpq, &ulpq->lobby, needed); 962 freed = sctp_ulpq_renege_list(ulpq, &ulpq->lobby, needed);
@@ -971,9 +968,8 @@ static void sctp_renege_events(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
971 needed); 968 needed);
972 } 969 }
973 970
974 if (chunk && freed >= needed) 971 if (freed >= needed && sctp_ulpevent_idata(ulpq, chunk, gfp) <= 0)
975 if (sctp_ulpevent_idata(ulpq, chunk, gfp) <= 0) 972 sctp_intl_start_pd(ulpq, gfp);
976 sctp_intl_start_pd(ulpq, gfp);
977 973
978 sk_mem_reclaim(asoc->base.sk); 974 sk_mem_reclaim(asoc->base.sk);
979} 975}
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index da1a5cdefd13..38ae22b65e77 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -281,7 +281,6 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
281 struct in_device *in_dev; 281 struct in_device *in_dev;
282 struct sockaddr_in addr; 282 struct sockaddr_in addr;
283 int rc = -ENOENT; 283 int rc = -ENOENT;
284 int len;
285 284
286 if (!dst) { 285 if (!dst) {
287 rc = -ENOTCONN; 286 rc = -ENOTCONN;
@@ -293,7 +292,7 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
293 } 292 }
294 293
295 /* get address to which the internal TCP socket is bound */ 294 /* get address to which the internal TCP socket is bound */
296 kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len); 295 kernel_getsockname(clcsock, (struct sockaddr *)&addr);
297 /* analyze IPv4 specific data of net_device belonging to TCP socket */ 296 /* analyze IPv4 specific data of net_device belonging to TCP socket */
298 rcu_read_lock(); 297 rcu_read_lock();
299 in_dev = __in_dev_get_rcu(dst->dev); 298 in_dev = __in_dev_get_rcu(dst->dev);
@@ -771,7 +770,7 @@ static void smc_listen_work(struct work_struct *work)
771 u8 buf[SMC_CLC_MAX_LEN]; 770 u8 buf[SMC_CLC_MAX_LEN];
772 struct smc_link *link; 771 struct smc_link *link;
773 int reason_code = 0; 772 int reason_code = 0;
774 int rc = 0, len; 773 int rc = 0;
775 __be32 subnet; 774 __be32 subnet;
776 u8 prefix_len; 775 u8 prefix_len;
777 u8 ibport; 776 u8 ibport;
@@ -824,7 +823,7 @@ static void smc_listen_work(struct work_struct *work)
824 } 823 }
825 824
826 /* get address of the peer connected to the internal TCP socket */ 825 /* get address of the peer connected to the internal TCP socket */
827 kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr, &len); 826 kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr);
828 827
829 /* allocate connection / link group */ 828 /* allocate connection / link group */
830 mutex_lock(&smc_create_lgr_pending); 829 mutex_lock(&smc_create_lgr_pending);
@@ -1075,7 +1074,7 @@ out:
1075} 1074}
1076 1075
1077static int smc_getname(struct socket *sock, struct sockaddr *addr, 1076static int smc_getname(struct socket *sock, struct sockaddr *addr,
1078 int *len, int peer) 1077 int peer)
1079{ 1078{
1080 struct smc_sock *smc; 1079 struct smc_sock *smc;
1081 1080
@@ -1085,7 +1084,7 @@ static int smc_getname(struct socket *sock, struct sockaddr *addr,
1085 1084
1086 smc = smc_sk(sock->sk); 1085 smc = smc_sk(sock->sk);
1087 1086
1088 return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer); 1087 return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
1089} 1088}
1090 1089
1091static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 1090static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
diff --git a/net/socket.c b/net/socket.c
index a93c99b518ca..ab58e57c09ca 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -104,7 +104,6 @@
104#include <linux/ipv6_route.h> 104#include <linux/ipv6_route.h>
105#include <linux/route.h> 105#include <linux/route.h>
106#include <linux/sockios.h> 106#include <linux/sockios.h>
107#include <linux/atalk.h>
108#include <net/busy_poll.h> 107#include <net/busy_poll.h>
109#include <linux/errqueue.h> 108#include <linux/errqueue.h>
110 109
@@ -991,10 +990,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
991 * what to do with it - that's up to the protocol still. 990 * what to do with it - that's up to the protocol still.
992 */ 991 */
993 992
994static struct ns_common *get_net_ns(struct ns_common *ns) 993struct ns_common *get_net_ns(struct ns_common *ns)
995{ 994{
996 return &get_net(container_of(ns, struct net, ns))->ns; 995 return &get_net(container_of(ns, struct net, ns))->ns;
997} 996}
997EXPORT_SYMBOL_GPL(get_net_ns);
998 998
999static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) 999static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1000{ 1000{
@@ -1573,8 +1573,9 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1573 goto out_fd; 1573 goto out_fd;
1574 1574
1575 if (upeer_sockaddr) { 1575 if (upeer_sockaddr) {
1576 if (newsock->ops->getname(newsock, (struct sockaddr *)&address, 1576 len = newsock->ops->getname(newsock,
1577 &len, 2) < 0) { 1577 (struct sockaddr *)&address, 2);
1578 if (len < 0) {
1578 err = -ECONNABORTED; 1579 err = -ECONNABORTED;
1579 goto out_fd; 1580 goto out_fd;
1580 } 1581 }
@@ -1654,7 +1655,7 @@ SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1654{ 1655{
1655 struct socket *sock; 1656 struct socket *sock;
1656 struct sockaddr_storage address; 1657 struct sockaddr_storage address;
1657 int len, err, fput_needed; 1658 int err, fput_needed;
1658 1659
1659 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1660 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1660 if (!sock) 1661 if (!sock)
@@ -1664,10 +1665,11 @@ SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1664 if (err) 1665 if (err)
1665 goto out_put; 1666 goto out_put;
1666 1667
1667 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0); 1668 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1668 if (err) 1669 if (err < 0)
1669 goto out_put; 1670 goto out_put;
1670 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len); 1671 /* "err" is actually length in this case */
1672 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1671 1673
1672out_put: 1674out_put:
1673 fput_light(sock->file, fput_needed); 1675 fput_light(sock->file, fput_needed);
@@ -1685,7 +1687,7 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1685{ 1687{
1686 struct socket *sock; 1688 struct socket *sock;
1687 struct sockaddr_storage address; 1689 struct sockaddr_storage address;
1688 int len, err, fput_needed; 1690 int err, fput_needed;
1689 1691
1690 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1692 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1691 if (sock != NULL) { 1693 if (sock != NULL) {
@@ -1695,11 +1697,10 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1695 return err; 1697 return err;
1696 } 1698 }
1697 1699
1698 err = 1700 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1699 sock->ops->getname(sock, (struct sockaddr *)&address, &len, 1701 if (err >= 0)
1700 1); 1702 /* "err" is actually length in this case */
1701 if (!err) 1703 err = move_addr_to_user(&address, err, usockaddr,
1702 err = move_addr_to_user(&address, len, usockaddr,
1703 usockaddr_len); 1704 usockaddr_len);
1704 fput_light(sock->file, fput_needed); 1705 fput_light(sock->file, fput_needed);
1705 } 1706 }
@@ -3166,17 +3167,15 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
3166} 3167}
3167EXPORT_SYMBOL(kernel_connect); 3168EXPORT_SYMBOL(kernel_connect);
3168 3169
3169int kernel_getsockname(struct socket *sock, struct sockaddr *addr, 3170int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
3170 int *addrlen)
3171{ 3171{
3172 return sock->ops->getname(sock, addr, addrlen, 0); 3172 return sock->ops->getname(sock, addr, 0);
3173} 3173}
3174EXPORT_SYMBOL(kernel_getsockname); 3174EXPORT_SYMBOL(kernel_getsockname);
3175 3175
3176int kernel_getpeername(struct socket *sock, struct sockaddr *addr, 3176int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
3177 int *addrlen)
3178{ 3177{
3179 return sock->ops->getname(sock, addr, addrlen, 1); 3178 return sock->ops->getname(sock, addr, 1);
3180} 3179}
3181EXPORT_SYMBOL(kernel_getpeername); 3180EXPORT_SYMBOL(kernel_getpeername);
3182 3181
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 6e432ecd7f99..806395687bb6 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1231,7 +1231,7 @@ static const struct sockaddr_in6 rpc_in6addr_loopback = {
1231 * negative errno is returned. 1231 * negative errno is returned.
1232 */ 1232 */
1233static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen, 1233static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
1234 struct sockaddr *buf, int buflen) 1234 struct sockaddr *buf)
1235{ 1235{
1236 struct socket *sock; 1236 struct socket *sock;
1237 int err; 1237 int err;
@@ -1269,7 +1269,7 @@ static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
1269 goto out_release; 1269 goto out_release;
1270 } 1270 }
1271 1271
1272 err = kernel_getsockname(sock, buf, &buflen); 1272 err = kernel_getsockname(sock, buf);
1273 if (err < 0) { 1273 if (err < 0) {
1274 dprintk("RPC: getsockname failed (%d)\n", err); 1274 dprintk("RPC: getsockname failed (%d)\n", err);
1275 goto out_release; 1275 goto out_release;
@@ -1353,7 +1353,7 @@ int rpc_localaddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t buflen)
1353 rcu_read_unlock(); 1353 rcu_read_unlock();
1354 1354
1355 rpc_set_port(sap, 0); 1355 rpc_set_port(sap, 0);
1356 err = rpc_sockname(net, sap, salen, buf, buflen); 1356 err = rpc_sockname(net, sap, salen, buf);
1357 put_net(net); 1357 put_net(net);
1358 if (err != 0) 1358 if (err != 0)
1359 /* Couldn't discover local address, return ANYADDR */ 1359 /* Couldn't discover local address, return ANYADDR */
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 943f2a745cd5..08cd951aaeea 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -832,12 +832,13 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
832 } 832 }
833 set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); 833 set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
834 834
835 err = kernel_getpeername(newsock, sin, &slen); 835 err = kernel_getpeername(newsock, sin);
836 if (err < 0) { 836 if (err < 0) {
837 net_warn_ratelimited("%s: peername failed (err %d)!\n", 837 net_warn_ratelimited("%s: peername failed (err %d)!\n",
838 serv->sv_name, -err); 838 serv->sv_name, -err);
839 goto failed; /* aborted connection or whatever */ 839 goto failed; /* aborted connection or whatever */
840 } 840 }
841 slen = err;
841 842
842 /* Ideally, we would want to reject connections from unauthorized 843 /* Ideally, we would want to reject connections from unauthorized
843 * hosts here, but when we get encryption, the IP of the host won't 844 * hosts here, but when we get encryption, the IP of the host won't
@@ -866,7 +867,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
866 if (IS_ERR(newsvsk)) 867 if (IS_ERR(newsvsk))
867 goto failed; 868 goto failed;
868 svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen); 869 svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
869 err = kernel_getsockname(newsock, sin, &slen); 870 err = kernel_getsockname(newsock, sin);
871 slen = err;
870 if (unlikely(err < 0)) { 872 if (unlikely(err < 0)) {
871 dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); 873 dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
872 slen = offsetof(struct sockaddr, sa_data); 874 slen = offsetof(struct sockaddr, sa_data);
@@ -1465,7 +1467,8 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
1465 err = PTR_ERR(svsk); 1467 err = PTR_ERR(svsk);
1466 goto out; 1468 goto out;
1467 } 1469 }
1468 if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0) 1470 salen = kernel_getsockname(svsk->sk_sock, sin);
1471 if (salen >= 0)
1469 svc_xprt_set_local(&svsk->sk_xprt, sin, salen); 1472 svc_xprt_set_local(&svsk->sk_xprt, sin, salen);
1470 svc_add_new_perm_xprt(serv, &svsk->sk_xprt); 1473 svc_add_new_perm_xprt(serv, &svsk->sk_xprt);
1471 return svc_one_sock_name(svsk, name_return, len); 1474 return svc_one_sock_name(svsk, name_return, len);
@@ -1539,10 +1542,10 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1539 if (error < 0) 1542 if (error < 0)
1540 goto bummer; 1543 goto bummer;
1541 1544
1542 newlen = len; 1545 error = kernel_getsockname(sock, newsin);
1543 error = kernel_getsockname(sock, newsin, &newlen);
1544 if (error < 0) 1546 if (error < 0)
1545 goto bummer; 1547 goto bummer;
1548 newlen = error;
1546 1549
1547 if (protocol == IPPROTO_TCP) { 1550 if (protocol == IPPROTO_TCP) {
1548 if ((error = kernel_listen(sock, 64)) < 0) 1551 if ((error = kernel_listen(sock, 64)) < 0)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index a6b8c1f8f92a..956e29c1438d 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1794,10 +1794,9 @@ static void xs_sock_set_reuseport(struct socket *sock)
1794static unsigned short xs_sock_getport(struct socket *sock) 1794static unsigned short xs_sock_getport(struct socket *sock)
1795{ 1795{
1796 struct sockaddr_storage buf; 1796 struct sockaddr_storage buf;
1797 int buflen;
1798 unsigned short port = 0; 1797 unsigned short port = 0;
1799 1798
1800 if (kernel_getsockname(sock, (struct sockaddr *)&buf, &buflen) < 0) 1799 if (kernel_getsockname(sock, (struct sockaddr *)&buf) < 0)
1801 goto out; 1800 goto out;
1802 switch (buf.ss_family) { 1801 switch (buf.ss_family) {
1803 case AF_INET6: 1802 case AF_INET6:
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 9aed6fe1bf1a..f424539829b7 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -89,6 +89,7 @@ static void __net_exit sysctl_net_exit(struct net *net)
89static struct pernet_operations sysctl_pernet_ops = { 89static struct pernet_operations sysctl_pernet_ops = {
90 .init = sysctl_net_init, 90 .init = sysctl_net_init,
91 .exit = sysctl_net_exit, 91 .exit = sysctl_net_exit,
92 .async = true,
92}; 93};
93 94
94static struct ctl_table_header *net_header; 95static struct ctl_table_header *net_header;
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 37bb0bfbd936..1edb7192aa2f 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -9,7 +9,7 @@ tipc-y += addr.o bcast.o bearer.o \
9 core.o link.o discover.o msg.o \ 9 core.o link.o discover.o msg.o \
10 name_distr.o subscr.o monitor.o name_table.o net.o \ 10 name_distr.o subscr.o monitor.o name_table.o net.o \
11 netlink.o netlink_compat.o node.o socket.o eth_media.o \ 11 netlink.o netlink_compat.o node.o socket.o eth_media.o \
12 server.o socket.o group.o 12 topsrv.o socket.o group.o
13 13
14tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o 14tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o
15tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o 15tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index c8001471da6c..f3d2e83313e1 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -813,7 +813,7 @@ err_out:
813 return err; 813 return err;
814} 814}
815 815
816int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) 816int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
817{ 817{
818 int err; 818 int err;
819 char *name; 819 char *name;
@@ -835,20 +835,27 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
835 835
836 name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); 836 name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
837 837
838 rtnl_lock();
839 bearer = tipc_bearer_find(net, name); 838 bearer = tipc_bearer_find(net, name);
840 if (!bearer) { 839 if (!bearer)
841 rtnl_unlock();
842 return -EINVAL; 840 return -EINVAL;
843 }
844 841
845 bearer_disable(net, bearer); 842 bearer_disable(net, bearer);
846 rtnl_unlock();
847 843
848 return 0; 844 return 0;
849} 845}
850 846
851int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) 847int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
848{
849 int err;
850
851 rtnl_lock();
852 err = __tipc_nl_bearer_disable(skb, info);
853 rtnl_unlock();
854
855 return err;
856}
857
858int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
852{ 859{
853 int err; 860 int err;
854 char *bearer; 861 char *bearer;
@@ -890,15 +897,18 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
890 prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); 897 prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
891 } 898 }
892 899
900 return tipc_enable_bearer(net, bearer, domain, prio, attrs);
901}
902
903int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
904{
905 int err;
906
893 rtnl_lock(); 907 rtnl_lock();
894 err = tipc_enable_bearer(net, bearer, domain, prio, attrs); 908 err = __tipc_nl_bearer_enable(skb, info);
895 if (err) {
896 rtnl_unlock();
897 return err;
898 }
899 rtnl_unlock(); 909 rtnl_unlock();
900 910
901 return 0; 911 return err;
902} 912}
903 913
904int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) 914int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
@@ -944,13 +954,13 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
944 return 0; 954 return 0;
945} 955}
946 956
947int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) 957int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
948{ 958{
949 int err;
950 char *name;
951 struct tipc_bearer *b; 959 struct tipc_bearer *b;
952 struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; 960 struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
953 struct net *net = sock_net(skb->sk); 961 struct net *net = sock_net(skb->sk);
962 char *name;
963 int err;
954 964
955 if (!info->attrs[TIPC_NLA_BEARER]) 965 if (!info->attrs[TIPC_NLA_BEARER])
956 return -EINVAL; 966 return -EINVAL;
@@ -965,35 +975,42 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
965 return -EINVAL; 975 return -EINVAL;
966 name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); 976 name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
967 977
968 rtnl_lock();
969 b = tipc_bearer_find(net, name); 978 b = tipc_bearer_find(net, name);
970 if (!b) { 979 if (!b)
971 rtnl_unlock();
972 return -EINVAL; 980 return -EINVAL;
973 }
974 981
975 if (attrs[TIPC_NLA_BEARER_PROP]) { 982 if (attrs[TIPC_NLA_BEARER_PROP]) {
976 struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; 983 struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
977 984
978 err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP], 985 err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP],
979 props); 986 props);
980 if (err) { 987 if (err)
981 rtnl_unlock();
982 return err; 988 return err;
983 }
984 989
985 if (props[TIPC_NLA_PROP_TOL]) 990 if (props[TIPC_NLA_PROP_TOL]) {
986 b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); 991 b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
992 tipc_node_apply_tolerance(net, b);
993 }
987 if (props[TIPC_NLA_PROP_PRIO]) 994 if (props[TIPC_NLA_PROP_PRIO])
988 b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); 995 b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
989 if (props[TIPC_NLA_PROP_WIN]) 996 if (props[TIPC_NLA_PROP_WIN])
990 b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); 997 b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
991 } 998 }
992 rtnl_unlock();
993 999
994 return 0; 1000 return 0;
995} 1001}
996 1002
1003int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
1004{
1005 int err;
1006
1007 rtnl_lock();
1008 err = __tipc_nl_bearer_set(skb, info);
1009 rtnl_unlock();
1010
1011 return err;
1012}
1013
997static int __tipc_nl_add_media(struct tipc_nl_msg *msg, 1014static int __tipc_nl_add_media(struct tipc_nl_msg *msg,
998 struct tipc_media *media, int nlflags) 1015 struct tipc_media *media, int nlflags)
999{ 1016{
@@ -1115,7 +1132,7 @@ err_out:
1115 return err; 1132 return err;
1116} 1133}
1117 1134
1118int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) 1135int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
1119{ 1136{
1120 int err; 1137 int err;
1121 char *name; 1138 char *name;
@@ -1133,22 +1150,17 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
1133 return -EINVAL; 1150 return -EINVAL;
1134 name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]); 1151 name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]);
1135 1152
1136 rtnl_lock();
1137 m = tipc_media_find(name); 1153 m = tipc_media_find(name);
1138 if (!m) { 1154 if (!m)
1139 rtnl_unlock();
1140 return -EINVAL; 1155 return -EINVAL;
1141 }
1142 1156
1143 if (attrs[TIPC_NLA_MEDIA_PROP]) { 1157 if (attrs[TIPC_NLA_MEDIA_PROP]) {
1144 struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; 1158 struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
1145 1159
1146 err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_MEDIA_PROP], 1160 err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_MEDIA_PROP],
1147 props); 1161 props);
1148 if (err) { 1162 if (err)
1149 rtnl_unlock();
1150 return err; 1163 return err;
1151 }
1152 1164
1153 if (props[TIPC_NLA_PROP_TOL]) 1165 if (props[TIPC_NLA_PROP_TOL])
1154 m->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); 1166 m->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
@@ -1157,7 +1169,17 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
1157 if (props[TIPC_NLA_PROP_WIN]) 1169 if (props[TIPC_NLA_PROP_WIN])
1158 m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); 1170 m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
1159 } 1171 }
1160 rtnl_unlock();
1161 1172
1162 return 0; 1173 return 0;
1163} 1174}
1175
1176int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
1177{
1178 int err;
1179
1180 rtnl_lock();
1181 err = __tipc_nl_media_set(skb, info);
1182 rtnl_unlock();
1183
1184 return err;
1185}
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 42d6eeeb646d..a53613d95bc9 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -188,15 +188,19 @@ extern struct tipc_media udp_media_info;
188#endif 188#endif
189 189
190int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info); 190int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info);
191int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info);
191int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info); 192int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info);
193int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info);
192int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb); 194int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb);
193int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info); 195int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info);
194int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info); 196int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info);
197int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info);
195int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info); 198int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info);
196 199
197int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb); 200int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb);
198int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info); 201int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info);
199int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info); 202int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info);
203int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info);
200 204
201int tipc_media_set_priority(const char *name, u32 new_value); 205int tipc_media_set_priority(const char *name, u32 new_value);
202int tipc_media_set_window(const char *name, u32 new_value); 206int tipc_media_set_window(const char *name, u32 new_value);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 20b21af2ff14..ff8b071654f5 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -64,7 +64,7 @@ struct tipc_bearer;
64struct tipc_bc_base; 64struct tipc_bc_base;
65struct tipc_link; 65struct tipc_link;
66struct tipc_name_table; 66struct tipc_name_table;
67struct tipc_server; 67struct tipc_topsrv;
68struct tipc_monitor; 68struct tipc_monitor;
69 69
70#define TIPC_MOD_VER "2.0.0" 70#define TIPC_MOD_VER "2.0.0"
@@ -112,7 +112,7 @@ struct tipc_net {
112 struct list_head dist_queue; 112 struct list_head dist_queue;
113 113
114 /* Topology subscription server */ 114 /* Topology subscription server */
115 struct tipc_server *topsrv; 115 struct tipc_topsrv *topsrv;
116 atomic_t subscription_count; 116 atomic_t subscription_count;
117}; 117};
118 118
@@ -131,7 +131,7 @@ static inline struct list_head *tipc_nodes(struct net *net)
131 return &tipc_net(net)->node_list; 131 return &tipc_net(net)->node_list;
132} 132}
133 133
134static inline struct tipc_server *tipc_topsrv(struct net *net) 134static inline struct tipc_topsrv *tipc_topsrv(struct net *net)
135{ 135{
136 return tipc_net(net)->topsrv; 136 return tipc_net(net)->topsrv;
137} 137}
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 122162a31816..03086ccb7746 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -37,7 +37,7 @@
37#include "addr.h" 37#include "addr.h"
38#include "group.h" 38#include "group.h"
39#include "bcast.h" 39#include "bcast.h"
40#include "server.h" 40#include "topsrv.h"
41#include "msg.h" 41#include "msg.h"
42#include "socket.h" 42#include "socket.h"
43#include "node.h" 43#include "node.h"
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 2d6b2aed30e0..3c230466804d 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2126,7 +2126,8 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
2126 struct sk_buff_head *xmitq) 2126 struct sk_buff_head *xmitq)
2127{ 2127{
2128 l->tolerance = tol; 2128 l->tolerance = tol;
2129 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq); 2129 if (link_is_up(l))
2130 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
2130} 2131}
2131 2132
2132void tipc_link_set_prio(struct tipc_link *l, u32 prio, 2133void tipc_link_set_prio(struct tipc_link *l, u32 prio,
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index ed0457cc99d6..e01c9c691ba2 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -326,10 +326,10 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
326 326
327 /* Any subscriptions waiting for notification? */ 327 /* Any subscriptions waiting for notification? */
328 list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { 328 list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
329 tipc_subscrp_report_overlap(s, publ->lower, publ->upper, 329 tipc_sub_report_overlap(s, publ->lower, publ->upper,
330 TIPC_PUBLISHED, publ->ref, 330 TIPC_PUBLISHED, publ->ref,
331 publ->node, publ->scope, 331 publ->node, publ->scope,
332 created_subseq); 332 created_subseq);
333 } 333 }
334 return publ; 334 return publ;
335} 335}
@@ -397,10 +397,9 @@ found:
397 397
398 /* Notify any waiting subscriptions */ 398 /* Notify any waiting subscriptions */
399 list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { 399 list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
400 tipc_subscrp_report_overlap(s, publ->lower, publ->upper, 400 tipc_sub_report_overlap(s, publ->lower, publ->upper,
401 TIPC_WITHDRAWN, publ->ref, 401 TIPC_WITHDRAWN, publ->ref, publ->node,
402 publ->node, publ->scope, 402 publ->scope, removed_subseq);
403 removed_subseq);
404 } 403 }
405 404
406 return publ; 405 return publ;
@@ -412,33 +411,37 @@ found:
412 * sequence overlapping with the requested sequence 411 * sequence overlapping with the requested sequence
413 */ 412 */
414static void tipc_nameseq_subscribe(struct name_seq *nseq, 413static void tipc_nameseq_subscribe(struct name_seq *nseq,
415 struct tipc_subscription *s, 414 struct tipc_subscription *sub)
416 bool status)
417{ 415{
418 struct sub_seq *sseq = nseq->sseqs; 416 struct sub_seq *sseq = nseq->sseqs;
419 struct tipc_name_seq ns; 417 struct tipc_name_seq ns;
418 struct tipc_subscr *s = &sub->evt.s;
419 bool no_status;
420 420
421 tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns); 421 ns.type = tipc_sub_read(s, seq.type);
422 ns.lower = tipc_sub_read(s, seq.lower);
423 ns.upper = tipc_sub_read(s, seq.upper);
424 no_status = tipc_sub_read(s, filter) & TIPC_SUB_NO_STATUS;
422 425
423 tipc_subscrp_get(s); 426 tipc_sub_get(sub);
424 list_add(&s->nameseq_list, &nseq->subscriptions); 427 list_add(&sub->nameseq_list, &nseq->subscriptions);
425 428
426 if (!status || !sseq) 429 if (no_status || !sseq)
427 return; 430 return;
428 431
429 while (sseq != &nseq->sseqs[nseq->first_free]) { 432 while (sseq != &nseq->sseqs[nseq->first_free]) {
430 if (tipc_subscrp_check_overlap(&ns, sseq->lower, sseq->upper)) { 433 if (tipc_sub_check_overlap(&ns, sseq->lower, sseq->upper)) {
431 struct publication *crs; 434 struct publication *crs;
432 struct name_info *info = sseq->info; 435 struct name_info *info = sseq->info;
433 int must_report = 1; 436 int must_report = 1;
434 437
435 list_for_each_entry(crs, &info->zone_list, zone_list) { 438 list_for_each_entry(crs, &info->zone_list, zone_list) {
436 tipc_subscrp_report_overlap(s, sseq->lower, 439 tipc_sub_report_overlap(sub, sseq->lower,
437 sseq->upper, 440 sseq->upper,
438 TIPC_PUBLISHED, 441 TIPC_PUBLISHED,
439 crs->ref, crs->node, 442 crs->ref, crs->node,
440 crs->scope, 443 crs->scope,
441 must_report); 444 must_report);
442 must_report = 0; 445 must_report = 0;
443 } 446 }
444 } 447 }
@@ -808,24 +811,27 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
808/** 811/**
809 * tipc_nametbl_subscribe - add a subscription object to the name table 812 * tipc_nametbl_subscribe - add a subscription object to the name table
810 */ 813 */
811void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status) 814void tipc_nametbl_subscribe(struct tipc_subscription *sub)
812{ 815{
813 struct tipc_net *tn = net_generic(s->net, tipc_net_id); 816 struct tipc_net *tn = tipc_net(sub->net);
814 u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap); 817 struct tipc_subscr *s = &sub->evt.s;
818 u32 type = tipc_sub_read(s, seq.type);
815 int index = hash(type); 819 int index = hash(type);
816 struct name_seq *seq; 820 struct name_seq *seq;
817 struct tipc_name_seq ns; 821 struct tipc_name_seq ns;
818 822
819 spin_lock_bh(&tn->nametbl_lock); 823 spin_lock_bh(&tn->nametbl_lock);
820 seq = nametbl_find_seq(s->net, type); 824 seq = nametbl_find_seq(sub->net, type);
821 if (!seq) 825 if (!seq)
822 seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]); 826 seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]);
823 if (seq) { 827 if (seq) {
824 spin_lock_bh(&seq->lock); 828 spin_lock_bh(&seq->lock);
825 tipc_nameseq_subscribe(seq, s, status); 829 tipc_nameseq_subscribe(seq, sub);
826 spin_unlock_bh(&seq->lock); 830 spin_unlock_bh(&seq->lock);
827 } else { 831 } else {
828 tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns); 832 ns.type = tipc_sub_read(s, seq.type);
833 ns.lower = tipc_sub_read(s, seq.lower);
834 ns.upper = tipc_sub_read(s, seq.upper);
829 pr_warn("Failed to create subscription for {%u,%u,%u}\n", 835 pr_warn("Failed to create subscription for {%u,%u,%u}\n",
830 ns.type, ns.lower, ns.upper); 836 ns.type, ns.lower, ns.upper);
831 } 837 }
@@ -835,18 +841,19 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status)
835/** 841/**
836 * tipc_nametbl_unsubscribe - remove a subscription object from name table 842 * tipc_nametbl_unsubscribe - remove a subscription object from name table
837 */ 843 */
838void tipc_nametbl_unsubscribe(struct tipc_subscription *s) 844void tipc_nametbl_unsubscribe(struct tipc_subscription *sub)
839{ 845{
840 struct tipc_net *tn = net_generic(s->net, tipc_net_id); 846 struct tipc_subscr *s = &sub->evt.s;
847 struct tipc_net *tn = tipc_net(sub->net);
841 struct name_seq *seq; 848 struct name_seq *seq;
842 u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap); 849 u32 type = tipc_sub_read(s, seq.type);
843 850
844 spin_lock_bh(&tn->nametbl_lock); 851 spin_lock_bh(&tn->nametbl_lock);
845 seq = nametbl_find_seq(s->net, type); 852 seq = nametbl_find_seq(sub->net, type);
846 if (seq != NULL) { 853 if (seq != NULL) {
847 spin_lock_bh(&seq->lock); 854 spin_lock_bh(&seq->lock);
848 list_del_init(&s->nameseq_list); 855 list_del_init(&sub->nameseq_list);
849 tipc_subscrp_put(s); 856 tipc_sub_put(sub);
850 if (!seq->first_free && list_empty(&seq->subscriptions)) { 857 if (!seq->first_free && list_empty(&seq->subscriptions)) {
851 hlist_del_init_rcu(&seq->ns_list); 858 hlist_del_init_rcu(&seq->ns_list);
852 kfree(seq->sseqs); 859 kfree(seq->sseqs);
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index f56e7cb3d436..17652602d5e2 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -120,7 +120,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
120struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, 120struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
121 u32 lower, u32 node, u32 ref, 121 u32 lower, u32 node, u32 ref,
122 u32 key); 122 u32 key);
123void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status); 123void tipc_nametbl_subscribe(struct tipc_subscription *s);
124void tipc_nametbl_unsubscribe(struct tipc_subscription *s); 124void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
125int tipc_nametbl_init(struct net *net); 125int tipc_nametbl_init(struct net *net);
126void tipc_nametbl_stop(struct net *net); 126void tipc_nametbl_stop(struct net *net);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 719c5924b638..1a2fde0d6f61 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -200,7 +200,7 @@ out:
200 return skb->len; 200 return skb->len;
201} 201}
202 202
203int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) 203int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
204{ 204{
205 struct net *net = sock_net(skb->sk); 205 struct net *net = sock_net(skb->sk);
206 struct tipc_net *tn = net_generic(net, tipc_net_id); 206 struct tipc_net *tn = net_generic(net, tipc_net_id);
@@ -241,10 +241,19 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
241 if (!tipc_addr_node_valid(addr)) 241 if (!tipc_addr_node_valid(addr))
242 return -EINVAL; 242 return -EINVAL;
243 243
244 rtnl_lock();
245 tipc_net_start(net, addr); 244 tipc_net_start(net, addr);
246 rtnl_unlock();
247 } 245 }
248 246
249 return 0; 247 return 0;
250} 248}
249
250int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
251{
252 int err;
253
254 rtnl_lock();
255 err = __tipc_nl_net_set(skb, info);
256 rtnl_unlock();
257
258 return err;
259}
diff --git a/net/tipc/net.h b/net/tipc/net.h
index c7c254902873..c0306aa2374b 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -47,5 +47,6 @@ void tipc_net_stop(struct net *net);
47 47
48int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb); 48int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb);
49int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info); 49int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
50int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
50 51
51#endif 52#endif
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index e48f0b2c01b9..4492cda45566 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -285,10 +285,6 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd,
285 if (!trans_buf) 285 if (!trans_buf)
286 return -ENOMEM; 286 return -ENOMEM;
287 287
288 err = (*cmd->transcode)(cmd, trans_buf, msg);
289 if (err)
290 goto trans_out;
291
292 attrbuf = kmalloc((tipc_genl_family.maxattr + 1) * 288 attrbuf = kmalloc((tipc_genl_family.maxattr + 1) *
293 sizeof(struct nlattr *), GFP_KERNEL); 289 sizeof(struct nlattr *), GFP_KERNEL);
294 if (!attrbuf) { 290 if (!attrbuf) {
@@ -296,27 +292,34 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd,
296 goto trans_out; 292 goto trans_out;
297 } 293 }
298 294
299 err = nla_parse(attrbuf, tipc_genl_family.maxattr,
300 (const struct nlattr *)trans_buf->data,
301 trans_buf->len, NULL, NULL);
302 if (err)
303 goto parse_out;
304
305 doit_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 295 doit_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
306 if (!doit_buf) { 296 if (!doit_buf) {
307 err = -ENOMEM; 297 err = -ENOMEM;
308 goto parse_out; 298 goto attrbuf_out;
309 } 299 }
310 300
311 doit_buf->sk = msg->dst_sk;
312
313 memset(&info, 0, sizeof(info)); 301 memset(&info, 0, sizeof(info));
314 info.attrs = attrbuf; 302 info.attrs = attrbuf;
315 303
304 rtnl_lock();
305 err = (*cmd->transcode)(cmd, trans_buf, msg);
306 if (err)
307 goto doit_out;
308
309 err = nla_parse(attrbuf, tipc_genl_family.maxattr,
310 (const struct nlattr *)trans_buf->data,
311 trans_buf->len, NULL, NULL);
312 if (err)
313 goto doit_out;
314
315 doit_buf->sk = msg->dst_sk;
316
316 err = (*cmd->doit)(doit_buf, &info); 317 err = (*cmd->doit)(doit_buf, &info);
318doit_out:
319 rtnl_unlock();
317 320
318 kfree_skb(doit_buf); 321 kfree_skb(doit_buf);
319parse_out: 322attrbuf_out:
320 kfree(attrbuf); 323 kfree(attrbuf);
321trans_out: 324trans_out:
322 kfree_skb(trans_buf); 325 kfree_skb(trans_buf);
@@ -722,13 +725,13 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd,
722 725
723 media = tipc_media_find(lc->name); 726 media = tipc_media_find(lc->name);
724 if (media) { 727 if (media) {
725 cmd->doit = &tipc_nl_media_set; 728 cmd->doit = &__tipc_nl_media_set;
726 return tipc_nl_compat_media_set(skb, msg); 729 return tipc_nl_compat_media_set(skb, msg);
727 } 730 }
728 731
729 bearer = tipc_bearer_find(msg->net, lc->name); 732 bearer = tipc_bearer_find(msg->net, lc->name);
730 if (bearer) { 733 if (bearer) {
731 cmd->doit = &tipc_nl_bearer_set; 734 cmd->doit = &__tipc_nl_bearer_set;
732 return tipc_nl_compat_bearer_set(skb, msg); 735 return tipc_nl_compat_bearer_set(skb, msg);
733 } 736 }
734 737
@@ -1089,12 +1092,12 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
1089 return tipc_nl_compat_dumpit(&dump, msg); 1092 return tipc_nl_compat_dumpit(&dump, msg);
1090 case TIPC_CMD_ENABLE_BEARER: 1093 case TIPC_CMD_ENABLE_BEARER:
1091 msg->req_type = TIPC_TLV_BEARER_CONFIG; 1094 msg->req_type = TIPC_TLV_BEARER_CONFIG;
1092 doit.doit = tipc_nl_bearer_enable; 1095 doit.doit = __tipc_nl_bearer_enable;
1093 doit.transcode = tipc_nl_compat_bearer_enable; 1096 doit.transcode = tipc_nl_compat_bearer_enable;
1094 return tipc_nl_compat_doit(&doit, msg); 1097 return tipc_nl_compat_doit(&doit, msg);
1095 case TIPC_CMD_DISABLE_BEARER: 1098 case TIPC_CMD_DISABLE_BEARER:
1096 msg->req_type = TIPC_TLV_BEARER_NAME; 1099 msg->req_type = TIPC_TLV_BEARER_NAME;
1097 doit.doit = tipc_nl_bearer_disable; 1100 doit.doit = __tipc_nl_bearer_disable;
1098 doit.transcode = tipc_nl_compat_bearer_disable; 1101 doit.transcode = tipc_nl_compat_bearer_disable;
1099 return tipc_nl_compat_doit(&doit, msg); 1102 return tipc_nl_compat_doit(&doit, msg);
1100 case TIPC_CMD_SHOW_LINK_STATS: 1103 case TIPC_CMD_SHOW_LINK_STATS:
@@ -1148,12 +1151,12 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
1148 return tipc_nl_compat_dumpit(&dump, msg); 1151 return tipc_nl_compat_dumpit(&dump, msg);
1149 case TIPC_CMD_SET_NODE_ADDR: 1152 case TIPC_CMD_SET_NODE_ADDR:
1150 msg->req_type = TIPC_TLV_NET_ADDR; 1153 msg->req_type = TIPC_TLV_NET_ADDR;
1151 doit.doit = tipc_nl_net_set; 1154 doit.doit = __tipc_nl_net_set;
1152 doit.transcode = tipc_nl_compat_net_set; 1155 doit.transcode = tipc_nl_compat_net_set;
1153 return tipc_nl_compat_doit(&doit, msg); 1156 return tipc_nl_compat_doit(&doit, msg);
1154 case TIPC_CMD_SET_NETID: 1157 case TIPC_CMD_SET_NETID:
1155 msg->req_type = TIPC_TLV_UNSIGNED; 1158 msg->req_type = TIPC_TLV_UNSIGNED;
1156 doit.doit = tipc_nl_net_set; 1159 doit.doit = __tipc_nl_net_set;
1157 doit.transcode = tipc_nl_compat_net_set; 1160 doit.transcode = tipc_nl_compat_net_set;
1158 return tipc_nl_compat_doit(&doit, msg); 1161 return tipc_nl_compat_doit(&doit, msg);
1159 case TIPC_CMD_GET_NETID: 1162 case TIPC_CMD_GET_NETID:
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 9036d8756e73..389193d7cf67 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1618,6 +1618,30 @@ discard:
1618 kfree_skb(skb); 1618 kfree_skb(skb);
1619} 1619}
1620 1620
1621void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b)
1622{
1623 struct tipc_net *tn = tipc_net(net);
1624 int bearer_id = b->identity;
1625 struct sk_buff_head xmitq;
1626 struct tipc_link_entry *e;
1627 struct tipc_node *n;
1628
1629 __skb_queue_head_init(&xmitq);
1630
1631 rcu_read_lock();
1632
1633 list_for_each_entry_rcu(n, &tn->node_list, list) {
1634 tipc_node_write_lock(n);
1635 e = &n->links[bearer_id];
1636 if (e->link)
1637 tipc_link_set_tolerance(e->link, b->tolerance, &xmitq);
1638 tipc_node_write_unlock(n);
1639 tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr);
1640 }
1641
1642 rcu_read_unlock();
1643}
1644
1621int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) 1645int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
1622{ 1646{
1623 struct net *net = sock_net(skb->sk); 1647 struct net *net = sock_net(skb->sk);
diff --git a/net/tipc/node.h b/net/tipc/node.h
index acd58d23a70e..4ce5e3a185c0 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -65,6 +65,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
65 struct tipc_media_addr *maddr, 65 struct tipc_media_addr *maddr,
66 bool *respond, bool *dupl_addr); 66 bool *respond, bool *dupl_addr);
67void tipc_node_delete_links(struct net *net, int bearer_id); 67void tipc_node_delete_links(struct net *net, int bearer_id);
68void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b);
68int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, 69int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
69 char *linkname, size_t len); 70 char *linkname, size_t len);
70int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, 71int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
diff --git a/net/tipc/server.c b/net/tipc/server.c
deleted file mode 100644
index df0c563c90cd..000000000000
--- a/net/tipc/server.c
+++ /dev/null
@@ -1,710 +0,0 @@
1/*
2 * net/tipc/server.c: TIPC server infrastructure
3 *
4 * Copyright (c) 2012-2013, Wind River Systems
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the names of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18 *
19 * Alternatively, this software may be distributed under the terms of the
20 * GNU General Public License ("GPL") version 2 as published by the Free
21 * Software Foundation.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36#include "server.h"
37#include "core.h"
38#include "socket.h"
39#include "addr.h"
40#include "msg.h"
41#include <net/sock.h>
42#include <linux/module.h>
43
44/* Number of messages to send before rescheduling */
45#define MAX_SEND_MSG_COUNT 25
46#define MAX_RECV_MSG_COUNT 25
47#define CF_CONNECTED 1
48#define CF_SERVER 2
49
50#define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data)
51
52/**
53 * struct tipc_conn - TIPC connection structure
54 * @kref: reference counter to connection object
55 * @conid: connection identifier
56 * @sock: socket handler associated with connection
57 * @flags: indicates connection state
58 * @server: pointer to connected server
59 * @rwork: receive work item
60 * @usr_data: user-specified field
61 * @rx_action: what to do when connection socket is active
62 * @outqueue: pointer to first outbound message in queue
63 * @outqueue_lock: control access to the outqueue
64 * @outqueue: list of connection objects for its server
65 * @swork: send work item
66 */
67struct tipc_conn {
68 struct kref kref;
69 int conid;
70 struct socket *sock;
71 unsigned long flags;
72 struct tipc_server *server;
73 struct work_struct rwork;
74 int (*rx_action) (struct tipc_conn *con);
75 void *usr_data;
76 struct list_head outqueue;
77 spinlock_t outqueue_lock;
78 struct work_struct swork;
79};
80
81/* An entry waiting to be sent */
82struct outqueue_entry {
83 struct list_head list;
84 struct kvec iov;
85 struct sockaddr_tipc dest;
86};
87
88static void tipc_recv_work(struct work_struct *work);
89static void tipc_send_work(struct work_struct *work);
90static void tipc_clean_outqueues(struct tipc_conn *con);
91
92static void tipc_conn_kref_release(struct kref *kref)
93{
94 struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
95 struct tipc_server *s = con->server;
96 struct sockaddr_tipc *saddr = s->saddr;
97 struct socket *sock = con->sock;
98 struct sock *sk;
99
100 if (sock) {
101 sk = sock->sk;
102 if (test_bit(CF_SERVER, &con->flags)) {
103 __module_get(sock->ops->owner);
104 __module_get(sk->sk_prot_creator->owner);
105 }
106 saddr->scope = -TIPC_NODE_SCOPE;
107 kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr));
108 sock_release(sock);
109 con->sock = NULL;
110 }
111 spin_lock_bh(&s->idr_lock);
112 idr_remove(&s->conn_idr, con->conid);
113 s->idr_in_use--;
114 spin_unlock_bh(&s->idr_lock);
115 tipc_clean_outqueues(con);
116 kfree(con);
117}
118
119static void conn_put(struct tipc_conn *con)
120{
121 kref_put(&con->kref, tipc_conn_kref_release);
122}
123
124static void conn_get(struct tipc_conn *con)
125{
126 kref_get(&con->kref);
127}
128
129static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid)
130{
131 struct tipc_conn *con;
132
133 spin_lock_bh(&s->idr_lock);
134 con = idr_find(&s->conn_idr, conid);
135 if (con) {
136 if (!test_bit(CF_CONNECTED, &con->flags) ||
137 !kref_get_unless_zero(&con->kref))
138 con = NULL;
139 }
140 spin_unlock_bh(&s->idr_lock);
141 return con;
142}
143
144static void sock_data_ready(struct sock *sk)
145{
146 struct tipc_conn *con;
147
148 read_lock_bh(&sk->sk_callback_lock);
149 con = sock2con(sk);
150 if (con && test_bit(CF_CONNECTED, &con->flags)) {
151 conn_get(con);
152 if (!queue_work(con->server->rcv_wq, &con->rwork))
153 conn_put(con);
154 }
155 read_unlock_bh(&sk->sk_callback_lock);
156}
157
158static void sock_write_space(struct sock *sk)
159{
160 struct tipc_conn *con;
161
162 read_lock_bh(&sk->sk_callback_lock);
163 con = sock2con(sk);
164 if (con && test_bit(CF_CONNECTED, &con->flags)) {
165 conn_get(con);
166 if (!queue_work(con->server->send_wq, &con->swork))
167 conn_put(con);
168 }
169 read_unlock_bh(&sk->sk_callback_lock);
170}
171
172static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con)
173{
174 struct sock *sk = sock->sk;
175
176 write_lock_bh(&sk->sk_callback_lock);
177
178 sk->sk_data_ready = sock_data_ready;
179 sk->sk_write_space = sock_write_space;
180 sk->sk_user_data = con;
181
182 con->sock = sock;
183
184 write_unlock_bh(&sk->sk_callback_lock);
185}
186
187static void tipc_close_conn(struct tipc_conn *con)
188{
189 struct tipc_server *s = con->server;
190 struct sock *sk = con->sock->sk;
191 bool disconnect = false;
192
193 write_lock_bh(&sk->sk_callback_lock);
194 disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags);
195 if (disconnect) {
196 sk->sk_user_data = NULL;
197 if (con->conid)
198 s->tipc_conn_release(con->conid, con->usr_data);
199 }
200 write_unlock_bh(&sk->sk_callback_lock);
201
202 /* Handle concurrent calls from sending and receiving threads */
203 if (!disconnect)
204 return;
205
206 /* Don't flush pending works, -just let them expire */
207 kernel_sock_shutdown(con->sock, SHUT_RDWR);
208 conn_put(con);
209}
210
211static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s)
212{
213 struct tipc_conn *con;
214 int ret;
215
216 con = kzalloc(sizeof(struct tipc_conn), GFP_ATOMIC);
217 if (!con)
218 return ERR_PTR(-ENOMEM);
219
220 kref_init(&con->kref);
221 INIT_LIST_HEAD(&con->outqueue);
222 spin_lock_init(&con->outqueue_lock);
223 INIT_WORK(&con->swork, tipc_send_work);
224 INIT_WORK(&con->rwork, tipc_recv_work);
225
226 spin_lock_bh(&s->idr_lock);
227 ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC);
228 if (ret < 0) {
229 kfree(con);
230 spin_unlock_bh(&s->idr_lock);
231 return ERR_PTR(-ENOMEM);
232 }
233 con->conid = ret;
234 s->idr_in_use++;
235 spin_unlock_bh(&s->idr_lock);
236
237 set_bit(CF_CONNECTED, &con->flags);
238 con->server = s;
239
240 return con;
241}
242
243static int tipc_receive_from_sock(struct tipc_conn *con)
244{
245 struct tipc_server *s = con->server;
246 struct sock *sk = con->sock->sk;
247 struct sockaddr_tipc addr;
248 struct msghdr msg = {};
249 struct kvec iov;
250 void *buf;
251 int ret;
252
253 buf = kmem_cache_alloc(s->rcvbuf_cache, GFP_ATOMIC);
254 if (!buf) {
255 ret = -ENOMEM;
256 goto out_close;
257 }
258
259 iov.iov_base = buf;
260 iov.iov_len = s->max_rcvbuf_size;
261 msg.msg_name = &addr;
262 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
263 ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
264 if (ret <= 0) {
265 kmem_cache_free(s->rcvbuf_cache, buf);
266 goto out_close;
267 }
268
269 read_lock_bh(&sk->sk_callback_lock);
270 if (test_bit(CF_CONNECTED, &con->flags))
271 ret = s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid,
272 &addr, con->usr_data, buf, ret);
273 read_unlock_bh(&sk->sk_callback_lock);
274 kmem_cache_free(s->rcvbuf_cache, buf);
275 if (ret < 0)
276 tipc_conn_terminate(s, con->conid);
277 return ret;
278
279out_close:
280 if (ret != -EWOULDBLOCK)
281 tipc_close_conn(con);
282 else if (ret == 0)
283 /* Don't return success if we really got EOF */
284 ret = -EAGAIN;
285
286 return ret;
287}
288
289static int tipc_accept_from_sock(struct tipc_conn *con)
290{
291 struct tipc_server *s = con->server;
292 struct socket *sock = con->sock;
293 struct socket *newsock;
294 struct tipc_conn *newcon;
295 int ret;
296
297 ret = kernel_accept(sock, &newsock, O_NONBLOCK);
298 if (ret < 0)
299 return ret;
300
301 newcon = tipc_alloc_conn(con->server);
302 if (IS_ERR(newcon)) {
303 ret = PTR_ERR(newcon);
304 sock_release(newsock);
305 return ret;
306 }
307
308 newcon->rx_action = tipc_receive_from_sock;
309 tipc_register_callbacks(newsock, newcon);
310
311 /* Notify that new connection is incoming */
312 newcon->usr_data = s->tipc_conn_new(newcon->conid);
313 if (!newcon->usr_data) {
314 sock_release(newsock);
315 conn_put(newcon);
316 return -ENOMEM;
317 }
318
319 /* Wake up receive process in case of 'SYN+' message */
320 newsock->sk->sk_data_ready(newsock->sk);
321 return ret;
322}
323
324static struct socket *tipc_create_listen_sock(struct tipc_conn *con)
325{
326 struct tipc_server *s = con->server;
327 struct socket *sock = NULL;
328 int ret;
329
330 ret = sock_create_kern(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock);
331 if (ret < 0)
332 return NULL;
333 ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE,
334 (char *)&s->imp, sizeof(s->imp));
335 if (ret < 0)
336 goto create_err;
337 ret = kernel_bind(sock, (struct sockaddr *)s->saddr, sizeof(*s->saddr));
338 if (ret < 0)
339 goto create_err;
340
341 switch (s->type) {
342 case SOCK_STREAM:
343 case SOCK_SEQPACKET:
344 con->rx_action = tipc_accept_from_sock;
345
346 ret = kernel_listen(sock, 0);
347 if (ret < 0)
348 goto create_err;
349 break;
350 case SOCK_DGRAM:
351 case SOCK_RDM:
352 con->rx_action = tipc_receive_from_sock;
353 break;
354 default:
355 pr_err("Unknown socket type %d\n", s->type);
356 goto create_err;
357 }
358
359 /* As server's listening socket owner and creator is the same module,
360 * we have to decrease TIPC module reference count to guarantee that
361 * it remains zero after the server socket is created, otherwise,
362 * executing "rmmod" command is unable to make TIPC module deleted
363 * after TIPC module is inserted successfully.
364 *
365 * However, the reference count is ever increased twice in
366 * sock_create_kern(): one is to increase the reference count of owner
367 * of TIPC socket's proto_ops struct; another is to increment the
368 * reference count of owner of TIPC proto struct. Therefore, we must
369 * decrement the module reference count twice to ensure that it keeps
370 * zero after server's listening socket is created. Of course, we
371 * must bump the module reference count twice as well before the socket
372 * is closed.
373 */
374 module_put(sock->ops->owner);
375 module_put(sock->sk->sk_prot_creator->owner);
376 set_bit(CF_SERVER, &con->flags);
377
378 return sock;
379
380create_err:
381 kernel_sock_shutdown(sock, SHUT_RDWR);
382 sock_release(sock);
383 return NULL;
384}
385
386static int tipc_open_listening_sock(struct tipc_server *s)
387{
388 struct socket *sock;
389 struct tipc_conn *con;
390
391 con = tipc_alloc_conn(s);
392 if (IS_ERR(con))
393 return PTR_ERR(con);
394
395 sock = tipc_create_listen_sock(con);
396 if (!sock) {
397 idr_remove(&s->conn_idr, con->conid);
398 s->idr_in_use--;
399 kfree(con);
400 return -EINVAL;
401 }
402
403 tipc_register_callbacks(sock, con);
404 return 0;
405}
406
407static struct outqueue_entry *tipc_alloc_entry(void *data, int len)
408{
409 struct outqueue_entry *entry;
410 void *buf;
411
412 entry = kmalloc(sizeof(struct outqueue_entry), GFP_ATOMIC);
413 if (!entry)
414 return NULL;
415
416 buf = kmemdup(data, len, GFP_ATOMIC);
417 if (!buf) {
418 kfree(entry);
419 return NULL;
420 }
421
422 entry->iov.iov_base = buf;
423 entry->iov.iov_len = len;
424
425 return entry;
426}
427
428static void tipc_free_entry(struct outqueue_entry *e)
429{
430 kfree(e->iov.iov_base);
431 kfree(e);
432}
433
434static void tipc_clean_outqueues(struct tipc_conn *con)
435{
436 struct outqueue_entry *e, *safe;
437
438 spin_lock_bh(&con->outqueue_lock);
439 list_for_each_entry_safe(e, safe, &con->outqueue, list) {
440 list_del(&e->list);
441 tipc_free_entry(e);
442 }
443 spin_unlock_bh(&con->outqueue_lock);
444}
445
446int tipc_conn_sendmsg(struct tipc_server *s, int conid,
447 struct sockaddr_tipc *addr, void *data, size_t len)
448{
449 struct outqueue_entry *e;
450 struct tipc_conn *con;
451
452 con = tipc_conn_lookup(s, conid);
453 if (!con)
454 return -EINVAL;
455
456 if (!test_bit(CF_CONNECTED, &con->flags)) {
457 conn_put(con);
458 return 0;
459 }
460
461 e = tipc_alloc_entry(data, len);
462 if (!e) {
463 conn_put(con);
464 return -ENOMEM;
465 }
466
467 if (addr)
468 memcpy(&e->dest, addr, sizeof(struct sockaddr_tipc));
469
470 spin_lock_bh(&con->outqueue_lock);
471 list_add_tail(&e->list, &con->outqueue);
472 spin_unlock_bh(&con->outqueue_lock);
473
474 if (!queue_work(s->send_wq, &con->swork))
475 conn_put(con);
476 return 0;
477}
478
479void tipc_conn_terminate(struct tipc_server *s, int conid)
480{
481 struct tipc_conn *con;
482
483 con = tipc_conn_lookup(s, conid);
484 if (con) {
485 tipc_close_conn(con);
486 conn_put(con);
487 }
488}
489
490bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
491 u32 upper, u32 filter, int *conid)
492{
493 struct tipc_subscriber *scbr;
494 struct tipc_subscr sub;
495 struct tipc_server *s;
496 struct tipc_conn *con;
497
498 sub.seq.type = type;
499 sub.seq.lower = lower;
500 sub.seq.upper = upper;
501 sub.timeout = TIPC_WAIT_FOREVER;
502 sub.filter = filter;
503 *(u32 *)&sub.usr_handle = port;
504
505 con = tipc_alloc_conn(tipc_topsrv(net));
506 if (IS_ERR(con))
507 return false;
508
509 *conid = con->conid;
510 s = con->server;
511 scbr = s->tipc_conn_new(*conid);
512 if (!scbr) {
513 conn_put(con);
514 return false;
515 }
516
517 con->usr_data = scbr;
518 con->sock = NULL;
519 s->tipc_conn_recvmsg(net, *conid, NULL, scbr, &sub, sizeof(sub));
520 return true;
521}
522
523void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
524{
525 struct tipc_conn *con;
526 struct tipc_server *srv;
527
528 con = tipc_conn_lookup(tipc_topsrv(net), conid);
529 if (!con)
530 return;
531
532 test_and_clear_bit(CF_CONNECTED, &con->flags);
533 srv = con->server;
534 if (con->conid)
535 srv->tipc_conn_release(con->conid, con->usr_data);
536 conn_put(con);
537 conn_put(con);
538}
539
540static void tipc_send_kern_top_evt(struct net *net, struct tipc_event *evt)
541{
542 u32 port = *(u32 *)&evt->s.usr_handle;
543 u32 self = tipc_own_addr(net);
544 struct sk_buff_head evtq;
545 struct sk_buff *skb;
546
547 skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
548 self, self, port, port, 0);
549 if (!skb)
550 return;
551 msg_set_dest_droppable(buf_msg(skb), true);
552 memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
553 skb_queue_head_init(&evtq);
554 __skb_queue_tail(&evtq, skb);
555 tipc_sk_rcv(net, &evtq);
556}
557
558static void tipc_send_to_sock(struct tipc_conn *con)
559{
560 struct tipc_server *s = con->server;
561 struct outqueue_entry *e;
562 struct tipc_event *evt;
563 struct msghdr msg;
564 int count = 0;
565 int ret;
566
567 spin_lock_bh(&con->outqueue_lock);
568 while (test_bit(CF_CONNECTED, &con->flags)) {
569 e = list_entry(con->outqueue.next, struct outqueue_entry, list);
570 if ((struct list_head *) e == &con->outqueue)
571 break;
572
573 spin_unlock_bh(&con->outqueue_lock);
574
575 if (con->sock) {
576 memset(&msg, 0, sizeof(msg));
577 msg.msg_flags = MSG_DONTWAIT;
578 if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) {
579 msg.msg_name = &e->dest;
580 msg.msg_namelen = sizeof(struct sockaddr_tipc);
581 }
582 ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
583 e->iov.iov_len);
584 if (ret == -EWOULDBLOCK || ret == 0) {
585 cond_resched();
586 goto out;
587 } else if (ret < 0) {
588 goto send_err;
589 }
590 } else {
591 evt = e->iov.iov_base;
592 tipc_send_kern_top_evt(s->net, evt);
593 }
594 /* Don't starve users filling buffers */
595 if (++count >= MAX_SEND_MSG_COUNT) {
596 cond_resched();
597 count = 0;
598 }
599
600 spin_lock_bh(&con->outqueue_lock);
601 list_del(&e->list);
602 tipc_free_entry(e);
603 }
604 spin_unlock_bh(&con->outqueue_lock);
605out:
606 return;
607
608send_err:
609 tipc_close_conn(con);
610}
611
612static void tipc_recv_work(struct work_struct *work)
613{
614 struct tipc_conn *con = container_of(work, struct tipc_conn, rwork);
615 int count = 0;
616
617 while (test_bit(CF_CONNECTED, &con->flags)) {
618 if (con->rx_action(con))
619 break;
620
621 /* Don't flood Rx machine */
622 if (++count >= MAX_RECV_MSG_COUNT) {
623 cond_resched();
624 count = 0;
625 }
626 }
627 conn_put(con);
628}
629
630static void tipc_send_work(struct work_struct *work)
631{
632 struct tipc_conn *con = container_of(work, struct tipc_conn, swork);
633
634 if (test_bit(CF_CONNECTED, &con->flags))
635 tipc_send_to_sock(con);
636
637 conn_put(con);
638}
639
640static void tipc_work_stop(struct tipc_server *s)
641{
642 destroy_workqueue(s->rcv_wq);
643 destroy_workqueue(s->send_wq);
644}
645
646static int tipc_work_start(struct tipc_server *s)
647{
648 s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0);
649 if (!s->rcv_wq) {
650 pr_err("can't start tipc receive workqueue\n");
651 return -ENOMEM;
652 }
653
654 s->send_wq = alloc_ordered_workqueue("tipc_send", 0);
655 if (!s->send_wq) {
656 pr_err("can't start tipc send workqueue\n");
657 destroy_workqueue(s->rcv_wq);
658 return -ENOMEM;
659 }
660
661 return 0;
662}
663
664int tipc_server_start(struct tipc_server *s)
665{
666 int ret;
667
668 spin_lock_init(&s->idr_lock);
669 idr_init(&s->conn_idr);
670 s->idr_in_use = 0;
671
672 s->rcvbuf_cache = kmem_cache_create(s->name, s->max_rcvbuf_size,
673 0, SLAB_HWCACHE_ALIGN, NULL);
674 if (!s->rcvbuf_cache)
675 return -ENOMEM;
676
677 ret = tipc_work_start(s);
678 if (ret < 0) {
679 kmem_cache_destroy(s->rcvbuf_cache);
680 return ret;
681 }
682 ret = tipc_open_listening_sock(s);
683 if (ret < 0) {
684 tipc_work_stop(s);
685 kmem_cache_destroy(s->rcvbuf_cache);
686 return ret;
687 }
688 return ret;
689}
690
691void tipc_server_stop(struct tipc_server *s)
692{
693 struct tipc_conn *con;
694 int id;
695
696 spin_lock_bh(&s->idr_lock);
697 for (id = 0; s->idr_in_use; id++) {
698 con = idr_find(&s->conn_idr, id);
699 if (con) {
700 spin_unlock_bh(&s->idr_lock);
701 tipc_close_conn(con);
702 spin_lock_bh(&s->idr_lock);
703 }
704 }
705 spin_unlock_bh(&s->idr_lock);
706
707 tipc_work_stop(s);
708 kmem_cache_destroy(s->rcvbuf_cache);
709 idr_destroy(&s->conn_idr);
710}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b0323ec7971e..f93477187a90 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -665,7 +665,7 @@ exit:
665 * a completely predictable manner). 665 * a completely predictable manner).
666 */ 666 */
667static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, 667static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
668 int *uaddr_len, int peer) 668 int peer)
669{ 669{
670 struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; 670 struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
671 struct sock *sk = sock->sk; 671 struct sock *sk = sock->sk;
@@ -684,13 +684,12 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
684 addr->addr.id.node = tn->own_addr; 684 addr->addr.id.node = tn->own_addr;
685 } 685 }
686 686
687 *uaddr_len = sizeof(*addr);
688 addr->addrtype = TIPC_ADDR_ID; 687 addr->addrtype = TIPC_ADDR_ID;
689 addr->family = AF_TIPC; 688 addr->family = AF_TIPC;
690 addr->scope = 0; 689 addr->scope = 0;
691 addr->addr.name.domain = 0; 690 addr->addr.name.domain = 0;
692 691
693 return 0; 692 return sizeof(*addr);
694} 693}
695 694
696/** 695/**
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 68e26470c516..6925a989569b 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * net/tipc/subscr.c: TIPC network topology service 2 * net/tipc/subscr.c: TIPC network topology service
3 * 3 *
4 * Copyright (c) 2000-2006, Ericsson AB 4 * Copyright (c) 2000-2017, Ericsson AB
5 * Copyright (c) 2005-2007, 2010-2013, Wind River Systems 5 * Copyright (c) 2005-2007, 2010-2013, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
@@ -38,61 +38,30 @@
38#include "name_table.h" 38#include "name_table.h"
39#include "subscr.h" 39#include "subscr.h"
40 40
41/** 41static void tipc_sub_send_event(struct tipc_subscription *sub,
42 * struct tipc_subscriber - TIPC network topology subscriber 42 u32 found_lower, u32 found_upper,
43 * @kref: reference counter to tipc_subscription object 43 u32 event, u32 port, u32 node)
44 * @conid: connection identifier to server connecting to subscriber
45 * @lock: control access to subscriber
46 * @subscrp_list: list of subscription objects for this subscriber
47 */
48struct tipc_subscriber {
49 struct kref kref;
50 int conid;
51 spinlock_t lock;
52 struct list_head subscrp_list;
53};
54
55static void tipc_subscrb_put(struct tipc_subscriber *subscriber);
56
57/**
58 * htohl - convert value to endianness used by destination
59 * @in: value to convert
60 * @swap: non-zero if endianness must be reversed
61 *
62 * Returns converted value
63 */
64static u32 htohl(u32 in, int swap)
65{
66 return swap ? swab32(in) : in;
67}
68
69static void tipc_subscrp_send_event(struct tipc_subscription *sub,
70 u32 found_lower, u32 found_upper,
71 u32 event, u32 port_ref, u32 node)
72{ 44{
73 struct tipc_net *tn = net_generic(sub->net, tipc_net_id); 45 struct tipc_event *evt = &sub->evt;
74 struct tipc_subscriber *subscriber = sub->subscriber;
75 struct kvec msg_sect;
76 46
77 msg_sect.iov_base = (void *)&sub->evt; 47 if (sub->inactive)
78 msg_sect.iov_len = sizeof(struct tipc_event); 48 return;
79 sub->evt.event = htohl(event, sub->swap); 49 tipc_evt_write(evt, event, event);
80 sub->evt.found_lower = htohl(found_lower, sub->swap); 50 tipc_evt_write(evt, found_lower, found_lower);
81 sub->evt.found_upper = htohl(found_upper, sub->swap); 51 tipc_evt_write(evt, found_upper, found_upper);
82 sub->evt.port.ref = htohl(port_ref, sub->swap); 52 tipc_evt_write(evt, port.ref, port);
83 sub->evt.port.node = htohl(node, sub->swap); 53 tipc_evt_write(evt, port.node, node);
84 tipc_conn_sendmsg(tn->topsrv, subscriber->conid, NULL, 54 tipc_topsrv_queue_evt(sub->net, sub->conid, event, evt);
85 msg_sect.iov_base, msg_sect.iov_len);
86} 55}
87 56
88/** 57/**
89 * tipc_subscrp_check_overlap - test for subscription overlap with the 58 * tipc_sub_check_overlap - test for subscription overlap with the
90 * given values 59 * given values
91 * 60 *
92 * Returns 1 if there is overlap, otherwise 0. 61 * Returns 1 if there is overlap, otherwise 0.
93 */ 62 */
94int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower, 63int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
95 u32 found_upper) 64 u32 found_upper)
96{ 65{
97 if (found_lower < seq->lower) 66 if (found_lower < seq->lower)
98 found_lower = seq->lower; 67 found_lower = seq->lower;
@@ -103,298 +72,98 @@ int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
103 return 1; 72 return 1;
104} 73}
105 74
106u32 tipc_subscrp_convert_seq_type(u32 type, int swap) 75void tipc_sub_report_overlap(struct tipc_subscription *sub,
107{ 76 u32 found_lower, u32 found_upper,
108 return htohl(type, swap); 77 u32 event, u32 port, u32 node,
109} 78 u32 scope, int must)
110
111void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
112 struct tipc_name_seq *out)
113{
114 out->type = htohl(in->type, swap);
115 out->lower = htohl(in->lower, swap);
116 out->upper = htohl(in->upper, swap);
117}
118
119void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
120 u32 found_upper, u32 event, u32 port_ref,
121 u32 node, u32 scope, int must)
122{ 79{
123 u32 filter = htohl(sub->evt.s.filter, sub->swap); 80 struct tipc_subscr *s = &sub->evt.s;
81 u32 filter = tipc_sub_read(s, filter);
124 struct tipc_name_seq seq; 82 struct tipc_name_seq seq;
125 83
126 tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq); 84 seq.type = tipc_sub_read(s, seq.type);
127 if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper)) 85 seq.lower = tipc_sub_read(s, seq.lower);
86 seq.upper = tipc_sub_read(s, seq.upper);
87
88 if (!tipc_sub_check_overlap(&seq, found_lower, found_upper))
128 return; 89 return;
90
129 if (!must && !(filter & TIPC_SUB_PORTS)) 91 if (!must && !(filter & TIPC_SUB_PORTS))
130 return; 92 return;
131 if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE) 93 if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE)
132 return; 94 return;
133 if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE) 95 if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE)
134 return; 96 return;
135 97 spin_lock(&sub->lock);
136 tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, 98 tipc_sub_send_event(sub, found_lower, found_upper,
137 node); 99 event, port, node);
100 spin_unlock(&sub->lock);
138} 101}
139 102
140static void tipc_subscrp_timeout(struct timer_list *t) 103static void tipc_sub_timeout(struct timer_list *t)
141{ 104{
142 struct tipc_subscription *sub = from_timer(sub, t, timer); 105 struct tipc_subscription *sub = from_timer(sub, t, timer);
143 struct tipc_subscriber *subscriber = sub->subscriber; 106 struct tipc_subscr *s = &sub->evt.s;
144
145 spin_lock_bh(&subscriber->lock);
146 tipc_nametbl_unsubscribe(sub);
147 list_del(&sub->subscrp_list);
148 spin_unlock_bh(&subscriber->lock);
149
150 /* Notify subscriber of timeout */
151 tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
152 TIPC_SUBSCR_TIMEOUT, 0, 0);
153
154 tipc_subscrp_put(sub);
155}
156
157static void tipc_subscrb_kref_release(struct kref *kref)
158{
159 kfree(container_of(kref,struct tipc_subscriber, kref));
160}
161
162static void tipc_subscrb_put(struct tipc_subscriber *subscriber)
163{
164 kref_put(&subscriber->kref, tipc_subscrb_kref_release);
165}
166 107
167static void tipc_subscrb_get(struct tipc_subscriber *subscriber) 108 spin_lock(&sub->lock);
168{ 109 tipc_sub_send_event(sub, s->seq.lower, s->seq.upper,
169 kref_get(&subscriber->kref); 110 TIPC_SUBSCR_TIMEOUT, 0, 0);
111 sub->inactive = true;
112 spin_unlock(&sub->lock);
170} 113}
171 114
172static void tipc_subscrp_kref_release(struct kref *kref) 115static void tipc_sub_kref_release(struct kref *kref)
173{ 116{
174 struct tipc_subscription *sub = container_of(kref, 117 kfree(container_of(kref, struct tipc_subscription, kref));
175 struct tipc_subscription,
176 kref);
177 struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
178 struct tipc_subscriber *subscriber = sub->subscriber;
179
180 atomic_dec(&tn->subscription_count);
181 kfree(sub);
182 tipc_subscrb_put(subscriber);
183} 118}
184 119
185void tipc_subscrp_put(struct tipc_subscription *subscription) 120void tipc_sub_put(struct tipc_subscription *subscription)
186{ 121{
187 kref_put(&subscription->kref, tipc_subscrp_kref_release); 122 kref_put(&subscription->kref, tipc_sub_kref_release);
188} 123}
189 124
190void tipc_subscrp_get(struct tipc_subscription *subscription) 125void tipc_sub_get(struct tipc_subscription *subscription)
191{ 126{
192 kref_get(&subscription->kref); 127 kref_get(&subscription->kref);
193} 128}
194 129
195/* tipc_subscrb_subscrp_delete - delete a specific subscription or all 130struct tipc_subscription *tipc_sub_subscribe(struct net *net,
196 * subscriptions for a given subscriber. 131 struct tipc_subscr *s,
197 */ 132 int conid)
198static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber,
199 struct tipc_subscr *s)
200{
201 struct list_head *subscription_list = &subscriber->subscrp_list;
202 struct tipc_subscription *sub, *temp;
203 u32 timeout;
204
205 spin_lock_bh(&subscriber->lock);
206 list_for_each_entry_safe(sub, temp, subscription_list, subscrp_list) {
207 if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr)))
208 continue;
209
210 timeout = htohl(sub->evt.s.timeout, sub->swap);
211 if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer)) {
212 tipc_nametbl_unsubscribe(sub);
213 list_del(&sub->subscrp_list);
214 tipc_subscrp_put(sub);
215 }
216
217 if (s)
218 break;
219 }
220 spin_unlock_bh(&subscriber->lock);
221}
222
223static struct tipc_subscriber *tipc_subscrb_create(int conid)
224{
225 struct tipc_subscriber *subscriber;
226
227 subscriber = kzalloc(sizeof(*subscriber), GFP_ATOMIC);
228 if (!subscriber) {
229 pr_warn("Subscriber rejected, no memory\n");
230 return NULL;
231 }
232 INIT_LIST_HEAD(&subscriber->subscrp_list);
233 kref_init(&subscriber->kref);
234 subscriber->conid = conid;
235 spin_lock_init(&subscriber->lock);
236
237 return subscriber;
238}
239
240static void tipc_subscrb_delete(struct tipc_subscriber *subscriber)
241{
242 tipc_subscrb_subscrp_delete(subscriber, NULL);
243 tipc_subscrb_put(subscriber);
244}
245
246static void tipc_subscrp_cancel(struct tipc_subscr *s,
247 struct tipc_subscriber *subscriber)
248{
249 tipc_subscrb_get(subscriber);
250 tipc_subscrb_subscrp_delete(subscriber, s);
251 tipc_subscrb_put(subscriber);
252}
253
254static struct tipc_subscription *tipc_subscrp_create(struct net *net,
255 struct tipc_subscr *s,
256 int swap)
257{ 133{
258 struct tipc_net *tn = net_generic(net, tipc_net_id); 134 u32 filter = tipc_sub_read(s, filter);
259 struct tipc_subscription *sub; 135 struct tipc_subscription *sub;
260 u32 filter = htohl(s->filter, swap); 136 u32 timeout;
261 137
262 /* Refuse subscription if global limit exceeded */ 138 if ((filter & TIPC_SUB_PORTS && filter & TIPC_SUB_SERVICE) ||
263 if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { 139 (tipc_sub_read(s, seq.lower) > tipc_sub_read(s, seq.upper))) {
264 pr_warn("Subscription rejected, limit reached (%u)\n", 140 pr_warn("Subscription rejected, illegal request\n");
265 TIPC_MAX_SUBSCRIPTIONS);
266 return NULL; 141 return NULL;
267 } 142 }
268
269 /* Allocate subscription object */
270 sub = kmalloc(sizeof(*sub), GFP_ATOMIC); 143 sub = kmalloc(sizeof(*sub), GFP_ATOMIC);
271 if (!sub) { 144 if (!sub) {
272 pr_warn("Subscription rejected, no memory\n"); 145 pr_warn("Subscription rejected, no memory\n");
273 return NULL; 146 return NULL;
274 } 147 }
275
276 /* Initialize subscription object */
277 sub->net = net; 148 sub->net = net;
278 if (((filter & TIPC_SUB_PORTS) && (filter & TIPC_SUB_SERVICE)) || 149 sub->conid = conid;
279 (htohl(s->seq.lower, swap) > htohl(s->seq.upper, swap))) { 150 sub->inactive = false;
280 pr_warn("Subscription rejected, illegal request\n");
281 kfree(sub);
282 return NULL;
283 }
284
285 sub->swap = swap;
286 memcpy(&sub->evt.s, s, sizeof(*s)); 151 memcpy(&sub->evt.s, s, sizeof(*s));
287 atomic_inc(&tn->subscription_count); 152 spin_lock_init(&sub->lock);
288 kref_init(&sub->kref); 153 kref_init(&sub->kref);
289 return sub; 154 tipc_nametbl_subscribe(sub);
290} 155 timer_setup(&sub->timer, tipc_sub_timeout, 0);
291 156 timeout = tipc_sub_read(&sub->evt.s, timeout);
292static int tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
293 struct tipc_subscriber *subscriber, int swap,
294 bool status)
295{
296 struct tipc_subscription *sub = NULL;
297 u32 timeout;
298
299 sub = tipc_subscrp_create(net, s, swap);
300 if (!sub)
301 return -1;
302
303 spin_lock_bh(&subscriber->lock);
304 list_add(&sub->subscrp_list, &subscriber->subscrp_list);
305 sub->subscriber = subscriber;
306 tipc_nametbl_subscribe(sub, status);
307 tipc_subscrb_get(subscriber);
308 spin_unlock_bh(&subscriber->lock);
309
310 timer_setup(&sub->timer, tipc_subscrp_timeout, 0);
311 timeout = htohl(sub->evt.s.timeout, swap);
312
313 if (timeout != TIPC_WAIT_FOREVER) 157 if (timeout != TIPC_WAIT_FOREVER)
314 mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout)); 158 mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout));
315 return 0; 159 return sub;
316}
317
318/* Handle one termination request for the subscriber */
319static void tipc_subscrb_release_cb(int conid, void *usr_data)
320{
321 tipc_subscrb_delete((struct tipc_subscriber *)usr_data);
322}
323
324/* Handle one request to create a new subscription for the subscriber */
325static int tipc_subscrb_rcv_cb(struct net *net, int conid,
326 struct sockaddr_tipc *addr, void *usr_data,
327 void *buf, size_t len)
328{
329 struct tipc_subscriber *subscriber = usr_data;
330 struct tipc_subscr *s = (struct tipc_subscr *)buf;
331 bool status;
332 int swap;
333
334 /* Determine subscriber's endianness */
335 swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE |
336 TIPC_SUB_CANCEL));
337
338 /* Detect & process a subscription cancellation request */
339 if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
340 s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
341 tipc_subscrp_cancel(s, subscriber);
342 return 0;
343 }
344 status = !(s->filter & htohl(TIPC_SUB_NO_STATUS, swap));
345 return tipc_subscrp_subscribe(net, s, subscriber, swap, status);
346}
347
348/* Handle one request to establish a new subscriber */
349static void *tipc_subscrb_connect_cb(int conid)
350{
351 return (void *)tipc_subscrb_create(conid);
352}
353
354int tipc_topsrv_start(struct net *net)
355{
356 struct tipc_net *tn = net_generic(net, tipc_net_id);
357 const char name[] = "topology_server";
358 struct tipc_server *topsrv;
359 struct sockaddr_tipc *saddr;
360
361 saddr = kzalloc(sizeof(*saddr), GFP_ATOMIC);
362 if (!saddr)
363 return -ENOMEM;
364 saddr->family = AF_TIPC;
365 saddr->addrtype = TIPC_ADDR_NAMESEQ;
366 saddr->addr.nameseq.type = TIPC_TOP_SRV;
367 saddr->addr.nameseq.lower = TIPC_TOP_SRV;
368 saddr->addr.nameseq.upper = TIPC_TOP_SRV;
369 saddr->scope = TIPC_NODE_SCOPE;
370
371 topsrv = kzalloc(sizeof(*topsrv), GFP_ATOMIC);
372 if (!topsrv) {
373 kfree(saddr);
374 return -ENOMEM;
375 }
376 topsrv->net = net;
377 topsrv->saddr = saddr;
378 topsrv->imp = TIPC_CRITICAL_IMPORTANCE;
379 topsrv->type = SOCK_SEQPACKET;
380 topsrv->max_rcvbuf_size = sizeof(struct tipc_subscr);
381 topsrv->tipc_conn_recvmsg = tipc_subscrb_rcv_cb;
382 topsrv->tipc_conn_new = tipc_subscrb_connect_cb;
383 topsrv->tipc_conn_release = tipc_subscrb_release_cb;
384
385 strncpy(topsrv->name, name, strlen(name) + 1);
386 tn->topsrv = topsrv;
387 atomic_set(&tn->subscription_count, 0);
388
389 return tipc_server_start(topsrv);
390} 160}
391 161
392void tipc_topsrv_stop(struct net *net) 162void tipc_sub_unsubscribe(struct tipc_subscription *sub)
393{ 163{
394 struct tipc_net *tn = net_generic(net, tipc_net_id); 164 tipc_nametbl_unsubscribe(sub);
395 struct tipc_server *topsrv = tn->topsrv; 165 if (sub->evt.s.timeout != TIPC_WAIT_FOREVER)
396 166 del_timer_sync(&sub->timer);
397 tipc_server_stop(topsrv); 167 list_del(&sub->sub_list);
398 kfree(topsrv->saddr); 168 tipc_sub_put(sub);
399 kfree(topsrv);
400} 169}
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index f3edca775d9f..8b2d22b18f22 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * net/tipc/subscr.h: Include file for TIPC network topology service 2 * net/tipc/subscr.h: Include file for TIPC network topology service
3 * 3 *
4 * Copyright (c) 2003-2006, Ericsson AB 4 * Copyright (c) 2003-2017, Ericsson AB
5 * Copyright (c) 2005-2007, 2012-2013, Wind River Systems 5 * Copyright (c) 2005-2007, 2012-2013, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
@@ -37,48 +37,72 @@
37#ifndef _TIPC_SUBSCR_H 37#ifndef _TIPC_SUBSCR_H
38#define _TIPC_SUBSCR_H 38#define _TIPC_SUBSCR_H
39 39
40#include "server.h" 40#include "topsrv.h"
41 41
42#define TIPC_MAX_SUBSCRIPTIONS 65535 42#define TIPC_MAX_SUBSCR 65535
43#define TIPC_MAX_PUBLICATIONS 65535 43#define TIPC_MAX_PUBLICATIONS 65535
44 44
45struct tipc_subscription; 45struct tipc_subscription;
46struct tipc_subscriber; 46struct tipc_conn;
47 47
48/** 48/**
49 * struct tipc_subscription - TIPC network topology subscription object 49 * struct tipc_subscription - TIPC network topology subscription object
50 * @subscriber: pointer to its subscriber 50 * @subscriber: pointer to its subscriber
51 * @seq: name sequence associated with subscription 51 * @seq: name sequence associated with subscription
52 * @net: point to network namespace
53 * @timer: timer governing subscription duration (optional) 52 * @timer: timer governing subscription duration (optional)
54 * @nameseq_list: adjacent subscriptions in name sequence's subscription list 53 * @nameseq_list: adjacent subscriptions in name sequence's subscription list
55 * @subscrp_list: adjacent subscriptions in subscriber's subscription list 54 * @sub_list: adjacent subscriptions in subscriber's subscription list
56 * @swap: indicates if subscriber uses opposite endianness in its messages
57 * @evt: template for events generated by subscription 55 * @evt: template for events generated by subscription
58 */ 56 */
59struct tipc_subscription { 57struct tipc_subscription {
60 struct kref kref; 58 struct kref kref;
61 struct tipc_subscriber *subscriber;
62 struct net *net; 59 struct net *net;
63 struct timer_list timer; 60 struct timer_list timer;
64 struct list_head nameseq_list; 61 struct list_head nameseq_list;
65 struct list_head subscrp_list; 62 struct list_head sub_list;
66 int swap;
67 struct tipc_event evt; 63 struct tipc_event evt;
64 int conid;
65 bool inactive;
66 spinlock_t lock; /* serialize up/down and timer events */
68}; 67};
69 68
70int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower, 69struct tipc_subscription *tipc_sub_subscribe(struct net *net,
71 u32 found_upper); 70 struct tipc_subscr *s,
72void tipc_subscrp_report_overlap(struct tipc_subscription *sub, 71 int conid);
73 u32 found_lower, u32 found_upper, u32 event, 72void tipc_sub_unsubscribe(struct tipc_subscription *sub);
74 u32 port_ref, u32 node, u32 scope, int must); 73
75void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap, 74int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
76 struct tipc_name_seq *out); 75 u32 found_upper);
77u32 tipc_subscrp_convert_seq_type(u32 type, int swap); 76void tipc_sub_report_overlap(struct tipc_subscription *sub,
77 u32 found_lower, u32 found_upper,
78 u32 event, u32 port, u32 node,
79 u32 scope, int must);
78int tipc_topsrv_start(struct net *net); 80int tipc_topsrv_start(struct net *net);
79void tipc_topsrv_stop(struct net *net); 81void tipc_topsrv_stop(struct net *net);
80 82
81void tipc_subscrp_put(struct tipc_subscription *subscription); 83void tipc_sub_put(struct tipc_subscription *subscription);
82void tipc_subscrp_get(struct tipc_subscription *subscription); 84void tipc_sub_get(struct tipc_subscription *subscription);
85
86#define TIPC_FILTER_MASK (TIPC_SUB_PORTS | TIPC_SUB_SERVICE | TIPC_SUB_CANCEL)
87
88/* tipc_sub_read - return field_ of struct sub_ in host endian format
89 */
90#define tipc_sub_read(sub_, field_) \
91 ({ \
92 struct tipc_subscr *sub__ = sub_; \
93 u32 val__ = (sub__)->field_; \
94 int swap_ = !((sub__)->filter & TIPC_FILTER_MASK); \
95 (swap_ ? swab32(val__) : val__); \
96 })
97
98/* tipc_evt_write - write val_ to field_ of struct evt_ in user endian format
99 */
100#define tipc_evt_write(evt_, field_, val_) \
101 ({ \
102 struct tipc_event *evt__ = evt_; \
103 u32 val__ = val_; \
104 int swap_ = !((evt__)->s.filter & (TIPC_FILTER_MASK)); \
105 (evt__)->field_ = swap_ ? swab32(val__) : val__; \
106 })
83 107
84#endif 108#endif
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
new file mode 100644
index 000000000000..c8e34ef22c30
--- /dev/null
+++ b/net/tipc/topsrv.c
@@ -0,0 +1,703 @@
1/*
2 * net/tipc/server.c: TIPC server infrastructure
3 *
4 * Copyright (c) 2012-2013, Wind River Systems
5 * Copyright (c) 2017-2018, Ericsson AB
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the names of the copyright holders nor the names of its
17 * contributors may be used to endorse or promote products derived from
18 * this software without specific prior written permission.
19 *
20 * Alternatively, this software may be distributed under the terms of the
21 * GNU General Public License ("GPL") version 2 as published by the Free
22 * Software Foundation.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36
37#include "subscr.h"
38#include "topsrv.h"
39#include "core.h"
40#include "socket.h"
41#include "addr.h"
42#include "msg.h"
43#include <net/sock.h>
44#include <linux/module.h>
45
46/* Number of messages to send before rescheduling */
47#define MAX_SEND_MSG_COUNT 25
48#define MAX_RECV_MSG_COUNT 25
49#define CF_CONNECTED 1
50#define CF_SERVER 2
51
52#define TIPC_SERVER_NAME_LEN 32
53
54/**
55 * struct tipc_topsrv - TIPC server structure
56 * @conn_idr: identifier set of connection
57 * @idr_lock: protect the connection identifier set
58 * @idr_in_use: amount of allocated identifier entry
59 * @net: network namspace instance
60 * @rcvbuf_cache: memory cache of server receive buffer
61 * @rcv_wq: receive workqueue
62 * @send_wq: send workqueue
63 * @max_rcvbuf_size: maximum permitted receive message length
64 * @tipc_conn_new: callback will be called when new connection is incoming
65 * @tipc_conn_release: callback will be called before releasing the connection
66 * @tipc_conn_recvmsg: callback will be called when message arrives
67 * @name: server name
68 * @imp: message importance
69 * @type: socket type
70 */
71struct tipc_topsrv {
72 struct idr conn_idr;
73 spinlock_t idr_lock; /* for idr list */
74 int idr_in_use;
75 struct net *net;
76 struct work_struct awork;
77 struct workqueue_struct *rcv_wq;
78 struct workqueue_struct *send_wq;
79 int max_rcvbuf_size;
80 struct socket *listener;
81 char name[TIPC_SERVER_NAME_LEN];
82};
83
84/**
85 * struct tipc_conn - TIPC connection structure
86 * @kref: reference counter to connection object
87 * @conid: connection identifier
88 * @sock: socket handler associated with connection
89 * @flags: indicates connection state
90 * @server: pointer to connected server
91 * @sub_list: lsit to all pertaing subscriptions
92 * @sub_lock: lock protecting the subscription list
93 * @outqueue_lock: control access to the outqueue
94 * @rwork: receive work item
95 * @rx_action: what to do when connection socket is active
96 * @outqueue: pointer to first outbound message in queue
97 * @outqueue_lock: control access to the outqueue
98 * @swork: send work item
99 */
100struct tipc_conn {
101 struct kref kref;
102 int conid;
103 struct socket *sock;
104 unsigned long flags;
105 struct tipc_topsrv *server;
106 struct list_head sub_list;
107 spinlock_t sub_lock; /* for subscription list */
108 struct work_struct rwork;
109 struct list_head outqueue;
110 spinlock_t outqueue_lock; /* for outqueue */
111 struct work_struct swork;
112};
113
114/* An entry waiting to be sent */
115struct outqueue_entry {
116 bool inactive;
117 struct tipc_event evt;
118 struct list_head list;
119};
120
121static void tipc_conn_recv_work(struct work_struct *work);
122static void tipc_conn_send_work(struct work_struct *work);
123static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt);
124static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s);
125
126static bool connected(struct tipc_conn *con)
127{
128 return con && test_bit(CF_CONNECTED, &con->flags);
129}
130
131static void tipc_conn_kref_release(struct kref *kref)
132{
133 struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
134 struct tipc_topsrv *s = con->server;
135 struct outqueue_entry *e, *safe;
136
137 spin_lock_bh(&s->idr_lock);
138 idr_remove(&s->conn_idr, con->conid);
139 s->idr_in_use--;
140 spin_unlock_bh(&s->idr_lock);
141 if (con->sock)
142 sock_release(con->sock);
143
144 spin_lock_bh(&con->outqueue_lock);
145 list_for_each_entry_safe(e, safe, &con->outqueue, list) {
146 list_del(&e->list);
147 kfree(e);
148 }
149 spin_unlock_bh(&con->outqueue_lock);
150 kfree(con);
151}
152
153static void conn_put(struct tipc_conn *con)
154{
155 kref_put(&con->kref, tipc_conn_kref_release);
156}
157
158static void conn_get(struct tipc_conn *con)
159{
160 kref_get(&con->kref);
161}
162
163static void tipc_conn_close(struct tipc_conn *con)
164{
165 struct sock *sk = con->sock->sk;
166 bool disconnect = false;
167
168 write_lock_bh(&sk->sk_callback_lock);
169 disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags);
170
171 if (disconnect) {
172 sk->sk_user_data = NULL;
173 tipc_conn_delete_sub(con, NULL);
174 }
175 write_unlock_bh(&sk->sk_callback_lock);
176
177 /* Handle concurrent calls from sending and receiving threads */
178 if (!disconnect)
179 return;
180
181 /* Don't flush pending works, -just let them expire */
182 kernel_sock_shutdown(con->sock, SHUT_RDWR);
183
184 conn_put(con);
185}
186
187static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s)
188{
189 struct tipc_conn *con;
190 int ret;
191
192 con = kzalloc(sizeof(*con), GFP_ATOMIC);
193 if (!con)
194 return ERR_PTR(-ENOMEM);
195
196 kref_init(&con->kref);
197 INIT_LIST_HEAD(&con->outqueue);
198 INIT_LIST_HEAD(&con->sub_list);
199 spin_lock_init(&con->outqueue_lock);
200 spin_lock_init(&con->sub_lock);
201 INIT_WORK(&con->swork, tipc_conn_send_work);
202 INIT_WORK(&con->rwork, tipc_conn_recv_work);
203
204 spin_lock_bh(&s->idr_lock);
205 ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC);
206 if (ret < 0) {
207 kfree(con);
208 spin_unlock_bh(&s->idr_lock);
209 return ERR_PTR(-ENOMEM);
210 }
211 con->conid = ret;
212 s->idr_in_use++;
213 spin_unlock_bh(&s->idr_lock);
214
215 set_bit(CF_CONNECTED, &con->flags);
216 con->server = s;
217
218 return con;
219}
220
221static struct tipc_conn *tipc_conn_lookup(struct tipc_topsrv *s, int conid)
222{
223 struct tipc_conn *con;
224
225 spin_lock_bh(&s->idr_lock);
226 con = idr_find(&s->conn_idr, conid);
227 if (!connected(con) || !kref_get_unless_zero(&con->kref))
228 con = NULL;
229 spin_unlock_bh(&s->idr_lock);
230 return con;
231}
232
233/* tipc_conn_delete_sub - delete a specific or all subscriptions
234 * for a given subscriber
235 */
236static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s)
237{
238 struct tipc_net *tn = tipc_net(con->server->net);
239 struct list_head *sub_list = &con->sub_list;
240 struct tipc_subscription *sub, *tmp;
241
242 spin_lock_bh(&con->sub_lock);
243 list_for_each_entry_safe(sub, tmp, sub_list, sub_list) {
244 if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) {
245 tipc_sub_unsubscribe(sub);
246 atomic_dec(&tn->subscription_count);
247 } else if (s) {
248 break;
249 }
250 }
251 spin_unlock_bh(&con->sub_lock);
252}
253
254static void tipc_conn_send_to_sock(struct tipc_conn *con)
255{
256 struct list_head *queue = &con->outqueue;
257 struct tipc_topsrv *srv = con->server;
258 struct outqueue_entry *e;
259 struct tipc_event *evt;
260 struct msghdr msg;
261 struct kvec iov;
262 int count = 0;
263 int ret;
264
265 spin_lock_bh(&con->outqueue_lock);
266
267 while (!list_empty(queue)) {
268 e = list_first_entry(queue, struct outqueue_entry, list);
269 evt = &e->evt;
270 spin_unlock_bh(&con->outqueue_lock);
271
272 if (e->inactive)
273 tipc_conn_delete_sub(con, &evt->s);
274
275 memset(&msg, 0, sizeof(msg));
276 msg.msg_flags = MSG_DONTWAIT;
277 iov.iov_base = evt;
278 iov.iov_len = sizeof(*evt);
279 msg.msg_name = NULL;
280
281 if (con->sock) {
282 ret = kernel_sendmsg(con->sock, &msg, &iov,
283 1, sizeof(*evt));
284 if (ret == -EWOULDBLOCK || ret == 0) {
285 cond_resched();
286 return;
287 } else if (ret < 0) {
288 return tipc_conn_close(con);
289 }
290 } else {
291 tipc_topsrv_kern_evt(srv->net, evt);
292 }
293
294 /* Don't starve users filling buffers */
295 if (++count >= MAX_SEND_MSG_COUNT) {
296 cond_resched();
297 count = 0;
298 }
299 spin_lock_bh(&con->outqueue_lock);
300 list_del(&e->list);
301 kfree(e);
302 }
303 spin_unlock_bh(&con->outqueue_lock);
304}
305
306static void tipc_conn_send_work(struct work_struct *work)
307{
308 struct tipc_conn *con = container_of(work, struct tipc_conn, swork);
309
310 if (connected(con))
311 tipc_conn_send_to_sock(con);
312
313 conn_put(con);
314}
315
316/* tipc_conn_queue_evt() - interrupt level call from a subscription instance
317 * The queued work is launched into tipc_send_work()->tipc_send_to_sock()
318 */
319void tipc_topsrv_queue_evt(struct net *net, int conid,
320 u32 event, struct tipc_event *evt)
321{
322 struct tipc_topsrv *srv = tipc_topsrv(net);
323 struct outqueue_entry *e;
324 struct tipc_conn *con;
325
326 con = tipc_conn_lookup(srv, conid);
327 if (!con)
328 return;
329
330 if (!connected(con))
331 goto err;
332
333 e = kmalloc(sizeof(*e), GFP_ATOMIC);
334 if (!e)
335 goto err;
336 e->inactive = (event == TIPC_SUBSCR_TIMEOUT);
337 memcpy(&e->evt, evt, sizeof(*evt));
338 spin_lock_bh(&con->outqueue_lock);
339 list_add_tail(&e->list, &con->outqueue);
340 spin_unlock_bh(&con->outqueue_lock);
341
342 if (queue_work(srv->send_wq, &con->swork))
343 return;
344err:
345 conn_put(con);
346}
347
348/* tipc_conn_write_space - interrupt callback after a sendmsg EAGAIN
349 * Indicates that there now is more space in the send buffer
350 * The queued work is launched into tipc_send_work()->tipc_conn_send_to_sock()
351 */
352static void tipc_conn_write_space(struct sock *sk)
353{
354 struct tipc_conn *con;
355
356 read_lock_bh(&sk->sk_callback_lock);
357 con = sk->sk_user_data;
358 if (connected(con)) {
359 conn_get(con);
360 if (!queue_work(con->server->send_wq, &con->swork))
361 conn_put(con);
362 }
363 read_unlock_bh(&sk->sk_callback_lock);
364}
365
366static int tipc_conn_rcv_sub(struct tipc_topsrv *srv,
367 struct tipc_conn *con,
368 struct tipc_subscr *s)
369{
370 struct tipc_net *tn = tipc_net(srv->net);
371 struct tipc_subscription *sub;
372
373 if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) {
374 tipc_conn_delete_sub(con, s);
375 return 0;
376 }
377 if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCR) {
378 pr_warn("Subscription rejected, max (%u)\n", TIPC_MAX_SUBSCR);
379 return -1;
380 }
381 sub = tipc_sub_subscribe(srv->net, s, con->conid);
382 if (!sub)
383 return -1;
384 atomic_inc(&tn->subscription_count);
385 spin_lock_bh(&con->sub_lock);
386 list_add(&sub->sub_list, &con->sub_list);
387 spin_unlock_bh(&con->sub_lock);
388 return 0;
389}
390
391static int tipc_conn_rcv_from_sock(struct tipc_conn *con)
392{
393 struct tipc_topsrv *srv = con->server;
394 struct sock *sk = con->sock->sk;
395 struct msghdr msg = {};
396 struct tipc_subscr s;
397 struct kvec iov;
398 int ret;
399
400 iov.iov_base = &s;
401 iov.iov_len = sizeof(s);
402 msg.msg_name = NULL;
403 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
404 ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
405 if (ret == -EWOULDBLOCK)
406 return -EWOULDBLOCK;
407 if (ret > 0) {
408 read_lock_bh(&sk->sk_callback_lock);
409 ret = tipc_conn_rcv_sub(srv, con, &s);
410 read_unlock_bh(&sk->sk_callback_lock);
411 }
412 if (ret < 0)
413 tipc_conn_close(con);
414
415 return ret;
416}
417
418static void tipc_conn_recv_work(struct work_struct *work)
419{
420 struct tipc_conn *con = container_of(work, struct tipc_conn, rwork);
421 int count = 0;
422
423 while (connected(con)) {
424 if (tipc_conn_rcv_from_sock(con))
425 break;
426
427 /* Don't flood Rx machine */
428 if (++count >= MAX_RECV_MSG_COUNT) {
429 cond_resched();
430 count = 0;
431 }
432 }
433 conn_put(con);
434}
435
436/* tipc_conn_data_ready - interrupt callback indicating the socket has data
437 * The queued work is launched into tipc_recv_work()->tipc_conn_rcv_from_sock()
438 */
439static void tipc_conn_data_ready(struct sock *sk)
440{
441 struct tipc_conn *con;
442
443 read_lock_bh(&sk->sk_callback_lock);
444 con = sk->sk_user_data;
445 if (connected(con)) {
446 conn_get(con);
447 if (!queue_work(con->server->rcv_wq, &con->rwork))
448 conn_put(con);
449 }
450 read_unlock_bh(&sk->sk_callback_lock);
451}
452
453static void tipc_topsrv_accept(struct work_struct *work)
454{
455 struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork);
456 struct socket *lsock = srv->listener;
457 struct socket *newsock;
458 struct tipc_conn *con;
459 struct sock *newsk;
460 int ret;
461
462 while (1) {
463 ret = kernel_accept(lsock, &newsock, O_NONBLOCK);
464 if (ret < 0)
465 return;
466 con = tipc_conn_alloc(srv);
467 if (IS_ERR(con)) {
468 ret = PTR_ERR(con);
469 sock_release(newsock);
470 return;
471 }
472 /* Register callbacks */
473 newsk = newsock->sk;
474 write_lock_bh(&newsk->sk_callback_lock);
475 newsk->sk_data_ready = tipc_conn_data_ready;
476 newsk->sk_write_space = tipc_conn_write_space;
477 newsk->sk_user_data = con;
478 con->sock = newsock;
479 write_unlock_bh(&newsk->sk_callback_lock);
480
481 /* Wake up receive process in case of 'SYN+' message */
482 newsk->sk_data_ready(newsk);
483 }
484}
485
486/* tipc_toprsv_listener_data_ready - interrupt callback with connection request
487 * The queued job is launched into tipc_topsrv_accept()
488 */
489static void tipc_topsrv_listener_data_ready(struct sock *sk)
490{
491 struct tipc_topsrv *srv;
492
493 read_lock_bh(&sk->sk_callback_lock);
494 srv = sk->sk_user_data;
495 if (srv->listener)
496 queue_work(srv->rcv_wq, &srv->awork);
497 read_unlock_bh(&sk->sk_callback_lock);
498}
499
500static int tipc_topsrv_create_listener(struct tipc_topsrv *srv)
501{
502 int imp = TIPC_CRITICAL_IMPORTANCE;
503 struct socket *lsock = NULL;
504 struct sockaddr_tipc saddr;
505 struct sock *sk;
506 int rc;
507
508 rc = sock_create_kern(srv->net, AF_TIPC, SOCK_SEQPACKET, 0, &lsock);
509 if (rc < 0)
510 return rc;
511
512 srv->listener = lsock;
513 sk = lsock->sk;
514 write_lock_bh(&sk->sk_callback_lock);
515 sk->sk_data_ready = tipc_topsrv_listener_data_ready;
516 sk->sk_user_data = srv;
517 write_unlock_bh(&sk->sk_callback_lock);
518
519 rc = kernel_setsockopt(lsock, SOL_TIPC, TIPC_IMPORTANCE,
520 (char *)&imp, sizeof(imp));
521 if (rc < 0)
522 goto err;
523
524 saddr.family = AF_TIPC;
525 saddr.addrtype = TIPC_ADDR_NAMESEQ;
526 saddr.addr.nameseq.type = TIPC_TOP_SRV;
527 saddr.addr.nameseq.lower = TIPC_TOP_SRV;
528 saddr.addr.nameseq.upper = TIPC_TOP_SRV;
529 saddr.scope = TIPC_NODE_SCOPE;
530
531 rc = kernel_bind(lsock, (struct sockaddr *)&saddr, sizeof(saddr));
532 if (rc < 0)
533 goto err;
534 rc = kernel_listen(lsock, 0);
535 if (rc < 0)
536 goto err;
537
538 /* As server's listening socket owner and creator is the same module,
539 * we have to decrease TIPC module reference count to guarantee that
540 * it remains zero after the server socket is created, otherwise,
541 * executing "rmmod" command is unable to make TIPC module deleted
542 * after TIPC module is inserted successfully.
543 *
544 * However, the reference count is ever increased twice in
545 * sock_create_kern(): one is to increase the reference count of owner
546 * of TIPC socket's proto_ops struct; another is to increment the
547 * reference count of owner of TIPC proto struct. Therefore, we must
548 * decrement the module reference count twice to ensure that it keeps
549 * zero after server's listening socket is created. Of course, we
550 * must bump the module reference count twice as well before the socket
551 * is closed.
552 */
553 module_put(lsock->ops->owner);
554 module_put(sk->sk_prot_creator->owner);
555
556 return 0;
557err:
558 sock_release(lsock);
559 return -EINVAL;
560}
561
562bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
563 u32 upper, u32 filter, int *conid)
564{
565 struct tipc_subscr sub;
566 struct tipc_conn *con;
567 int rc;
568
569 sub.seq.type = type;
570 sub.seq.lower = lower;
571 sub.seq.upper = upper;
572 sub.timeout = TIPC_WAIT_FOREVER;
573 sub.filter = filter;
574 *(u32 *)&sub.usr_handle = port;
575
576 con = tipc_conn_alloc(tipc_topsrv(net));
577 if (IS_ERR(con))
578 return false;
579
580 *conid = con->conid;
581 con->sock = NULL;
582 rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub);
583 if (rc >= 0)
584 return true;
585 conn_put(con);
586 return false;
587}
588
589void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
590{
591 struct tipc_conn *con;
592
593 con = tipc_conn_lookup(tipc_topsrv(net), conid);
594 if (!con)
595 return;
596
597 test_and_clear_bit(CF_CONNECTED, &con->flags);
598 tipc_conn_delete_sub(con, NULL);
599 conn_put(con);
600 conn_put(con);
601}
602
603static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt)
604{
605 u32 port = *(u32 *)&evt->s.usr_handle;
606 u32 self = tipc_own_addr(net);
607 struct sk_buff_head evtq;
608 struct sk_buff *skb;
609
610 skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
611 self, self, port, port, 0);
612 if (!skb)
613 return;
614 msg_set_dest_droppable(buf_msg(skb), true);
615 memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
616 skb_queue_head_init(&evtq);
617 __skb_queue_tail(&evtq, skb);
618 tipc_sk_rcv(net, &evtq);
619}
620
621static int tipc_topsrv_work_start(struct tipc_topsrv *s)
622{
623 s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0);
624 if (!s->rcv_wq) {
625 pr_err("can't start tipc receive workqueue\n");
626 return -ENOMEM;
627 }
628
629 s->send_wq = alloc_ordered_workqueue("tipc_send", 0);
630 if (!s->send_wq) {
631 pr_err("can't start tipc send workqueue\n");
632 destroy_workqueue(s->rcv_wq);
633 return -ENOMEM;
634 }
635
636 return 0;
637}
638
639static void tipc_topsrv_work_stop(struct tipc_topsrv *s)
640{
641 destroy_workqueue(s->rcv_wq);
642 destroy_workqueue(s->send_wq);
643}
644
645int tipc_topsrv_start(struct net *net)
646{
647 struct tipc_net *tn = tipc_net(net);
648 const char name[] = "topology_server";
649 struct tipc_topsrv *srv;
650 int ret;
651
652 srv = kzalloc(sizeof(*srv), GFP_ATOMIC);
653 if (!srv)
654 return -ENOMEM;
655
656 srv->net = net;
657 srv->max_rcvbuf_size = sizeof(struct tipc_subscr);
658 INIT_WORK(&srv->awork, tipc_topsrv_accept);
659
660 strncpy(srv->name, name, strlen(name) + 1);
661 tn->topsrv = srv;
662 atomic_set(&tn->subscription_count, 0);
663
664 spin_lock_init(&srv->idr_lock);
665 idr_init(&srv->conn_idr);
666 srv->idr_in_use = 0;
667
668 ret = tipc_topsrv_work_start(srv);
669 if (ret < 0)
670 return ret;
671
672 ret = tipc_topsrv_create_listener(srv);
673 if (ret < 0)
674 tipc_topsrv_work_stop(srv);
675
676 return ret;
677}
678
679void tipc_topsrv_stop(struct net *net)
680{
681 struct tipc_topsrv *srv = tipc_topsrv(net);
682 struct socket *lsock = srv->listener;
683 struct tipc_conn *con;
684 int id;
685
686 spin_lock_bh(&srv->idr_lock);
687 for (id = 0; srv->idr_in_use; id++) {
688 con = idr_find(&srv->conn_idr, id);
689 if (con) {
690 spin_unlock_bh(&srv->idr_lock);
691 tipc_conn_close(con);
692 spin_lock_bh(&srv->idr_lock);
693 }
694 }
695 __module_get(lsock->ops->owner);
696 __module_get(lsock->sk->sk_prot_creator->owner);
697 srv->listener = NULL;
698 spin_unlock_bh(&srv->idr_lock);
699 sock_release(lsock);
700 tipc_topsrv_work_stop(srv);
701 idr_destroy(&srv->conn_idr);
702 kfree(srv);
703}
diff --git a/net/tipc/server.h b/net/tipc/topsrv.h
index 64df7513cd70..c7ea71293748 100644
--- a/net/tipc/server.h
+++ b/net/tipc/topsrv.h
@@ -2,6 +2,7 @@
2 * net/tipc/server.h: Include file for TIPC server code 2 * net/tipc/server.h: Include file for TIPC server code
3 * 3 *
4 * Copyright (c) 2012-2013, Wind River Systems 4 * Copyright (c) 2012-2013, Wind River Systems
5 * Copyright (c) 2017, Ericsson AB
5 * All rights reserved. 6 * All rights reserved.
6 * 7 *
7 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -36,68 +37,18 @@
36#ifndef _TIPC_SERVER_H 37#ifndef _TIPC_SERVER_H
37#define _TIPC_SERVER_H 38#define _TIPC_SERVER_H
38 39
39#include <linux/idr.h> 40#include "core.h"
40#include <linux/tipc.h>
41#include <net/net_namespace.h>
42 41
43#define TIPC_SERVER_NAME_LEN 32 42#define TIPC_SERVER_NAME_LEN 32
44#define TIPC_SUB_CLUSTER_SCOPE 0x20 43#define TIPC_SUB_CLUSTER_SCOPE 0x20
45#define TIPC_SUB_NODE_SCOPE 0x40 44#define TIPC_SUB_NODE_SCOPE 0x40
46#define TIPC_SUB_NO_STATUS 0x80 45#define TIPC_SUB_NO_STATUS 0x80
47 46
48/** 47void tipc_topsrv_queue_evt(struct net *net, int conid,
49 * struct tipc_server - TIPC server structure 48 u32 event, struct tipc_event *evt);
50 * @conn_idr: identifier set of connection
51 * @idr_lock: protect the connection identifier set
52 * @idr_in_use: amount of allocated identifier entry
53 * @net: network namspace instance
54 * @rcvbuf_cache: memory cache of server receive buffer
55 * @rcv_wq: receive workqueue
56 * @send_wq: send workqueue
57 * @max_rcvbuf_size: maximum permitted receive message length
58 * @tipc_conn_new: callback will be called when new connection is incoming
59 * @tipc_conn_release: callback will be called before releasing the connection
60 * @tipc_conn_recvmsg: callback will be called when message arrives
61 * @saddr: TIPC server address
62 * @name: server name
63 * @imp: message importance
64 * @type: socket type
65 */
66struct tipc_server {
67 struct idr conn_idr;
68 spinlock_t idr_lock;
69 int idr_in_use;
70 struct net *net;
71 struct kmem_cache *rcvbuf_cache;
72 struct workqueue_struct *rcv_wq;
73 struct workqueue_struct *send_wq;
74 int max_rcvbuf_size;
75 void *(*tipc_conn_new)(int conid);
76 void (*tipc_conn_release)(int conid, void *usr_data);
77 int (*tipc_conn_recvmsg)(struct net *net, int conid,
78 struct sockaddr_tipc *addr, void *usr_data,
79 void *buf, size_t len);
80 struct sockaddr_tipc *saddr;
81 char name[TIPC_SERVER_NAME_LEN];
82 int imp;
83 int type;
84};
85
86int tipc_conn_sendmsg(struct tipc_server *s, int conid,
87 struct sockaddr_tipc *addr, void *data, size_t len);
88 49
89bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, 50bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
90 u32 upper, u32 filter, int *conid); 51 u32 upper, u32 filter, int *conid);
91void tipc_topsrv_kern_unsubscr(struct net *net, int conid); 52void tipc_topsrv_kern_unsubscr(struct net *net, int conid);
92 53
93/**
94 * tipc_conn_terminate - terminate connection with server
95 *
96 * Note: Must call it in process context since it might sleep
97 */
98void tipc_conn_terminate(struct tipc_server *s, int conid);
99int tipc_server_start(struct tipc_server *s);
100
101void tipc_server_stop(struct tipc_server *s);
102
103#endif 54#endif
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index b0d5fcea47e7..e9b4b53ab53e 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -308,8 +308,11 @@ static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
308 goto out; 308 goto out;
309 } 309 }
310 lock_sock(sk); 310 lock_sock(sk);
311 memcpy(crypto_info_aes_gcm_128->iv, ctx->iv, 311 memcpy(crypto_info_aes_gcm_128->iv,
312 ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
312 TLS_CIPHER_AES_GCM_128_IV_SIZE); 313 TLS_CIPHER_AES_GCM_128_IV_SIZE);
314 memcpy(crypto_info_aes_gcm_128->rec_seq, ctx->rec_seq,
315 TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
313 release_sock(sk); 316 release_sock(sk);
314 if (copy_to_user(optval, 317 if (copy_to_user(optval,
315 crypto_info_aes_gcm_128, 318 crypto_info_aes_gcm_128,
@@ -375,7 +378,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
375 rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info)); 378 rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info));
376 if (rc) { 379 if (rc) {
377 rc = -EFAULT; 380 rc = -EFAULT;
378 goto out; 381 goto err_crypto_info;
379 } 382 }
380 383
381 /* check version */ 384 /* check version */
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index d545e1d0dea2..bc2970a8e7f3 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -637,7 +637,7 @@ static int unix_stream_connect(struct socket *, struct sockaddr *,
637 int addr_len, int flags); 637 int addr_len, int flags);
638static int unix_socketpair(struct socket *, struct socket *); 638static int unix_socketpair(struct socket *, struct socket *);
639static int unix_accept(struct socket *, struct socket *, int, bool); 639static int unix_accept(struct socket *, struct socket *, int, bool);
640static int unix_getname(struct socket *, struct sockaddr *, int *, int); 640static int unix_getname(struct socket *, struct sockaddr *, int);
641static __poll_t unix_poll(struct file *, struct socket *, poll_table *); 641static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
642static __poll_t unix_dgram_poll(struct file *, struct socket *, 642static __poll_t unix_dgram_poll(struct file *, struct socket *,
643 poll_table *); 643 poll_table *);
@@ -1453,7 +1453,7 @@ out:
1453} 1453}
1454 1454
1455 1455
1456static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) 1456static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1457{ 1457{
1458 struct sock *sk = sock->sk; 1458 struct sock *sk = sock->sk;
1459 struct unix_sock *u; 1459 struct unix_sock *u;
@@ -1476,12 +1476,12 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_
1476 if (!u->addr) { 1476 if (!u->addr) {
1477 sunaddr->sun_family = AF_UNIX; 1477 sunaddr->sun_family = AF_UNIX;
1478 sunaddr->sun_path[0] = 0; 1478 sunaddr->sun_path[0] = 0;
1479 *uaddr_len = sizeof(short); 1479 err = sizeof(short);
1480 } else { 1480 } else {
1481 struct unix_address *addr = u->addr; 1481 struct unix_address *addr = u->addr;
1482 1482
1483 *uaddr_len = addr->len; 1483 err = addr->len;
1484 memcpy(sunaddr, addr->name, *uaddr_len); 1484 memcpy(sunaddr, addr->name, addr->len);
1485 } 1485 }
1486 unix_state_unlock(sk); 1486 unix_state_unlock(sk);
1487 sock_put(sk); 1487 sock_put(sk);
@@ -1825,7 +1825,7 @@ out:
1825} 1825}
1826 1826
1827/* We use paged skbs for stream sockets, and limit occupancy to 32768 1827/* We use paged skbs for stream sockets, and limit occupancy to 32768
1828 * bytes, and a minimun of a full page. 1828 * bytes, and a minimum of a full page.
1829 */ 1829 */
1830#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) 1830#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1831 1831
@@ -2913,6 +2913,7 @@ static void __net_exit unix_net_exit(struct net *net)
2913static struct pernet_operations unix_net_ops = { 2913static struct pernet_operations unix_net_ops = {
2914 .init = unix_net_init, 2914 .init = unix_net_init,
2915 .exit = unix_net_exit, 2915 .exit = unix_net_exit,
2916 .async = true,
2916}; 2917};
2917 2918
2918static int __init af_unix_init(void) 2919static int __init af_unix_init(void)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index e0fc84daed94..aac9b8f6552e 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -759,7 +759,7 @@ vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
759} 759}
760 760
761static int vsock_getname(struct socket *sock, 761static int vsock_getname(struct socket *sock,
762 struct sockaddr *addr, int *addr_len, int peer) 762 struct sockaddr *addr, int peer)
763{ 763{
764 int err; 764 int err;
765 struct sock *sk; 765 struct sock *sk;
@@ -794,7 +794,7 @@ static int vsock_getname(struct socket *sock,
794 */ 794 */
795 BUILD_BUG_ON(sizeof(*vm_addr) > 128); 795 BUILD_BUG_ON(sizeof(*vm_addr) > 128);
796 memcpy(addr, vm_addr, sizeof(*vm_addr)); 796 memcpy(addr, vm_addr, sizeof(*vm_addr));
797 *addr_len = sizeof(*vm_addr); 797 err = sizeof(*vm_addr);
798 798
799out: 799out:
800 release_sock(sk); 800 release_sock(sk);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index a6f3cac8c640..670aa229168a 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1340,6 +1340,7 @@ static void __net_exit cfg80211_pernet_exit(struct net *net)
1340 1340
1341static struct pernet_operations cfg80211_pernet_ops = { 1341static struct pernet_operations cfg80211_pernet_ops = {
1342 .exit = cfg80211_pernet_exit, 1342 .exit = cfg80211_pernet_exit,
1343 .async = true,
1343}; 1344};
1344 1345
1345static int __init cfg80211_init(void) 1346static int __init cfg80211_init(void)
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 9efbfc753347..bc7064486b15 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -390,6 +390,7 @@ static void __net_exit wext_pernet_exit(struct net *net)
390static struct pernet_operations wext_pernet_ops = { 390static struct pernet_operations wext_pernet_ops = {
391 .init = wext_pernet_init, 391 .init = wext_pernet_init,
392 .exit = wext_pernet_exit, 392 .exit = wext_pernet_exit,
393 .async = true,
393}; 394};
394 395
395static int __init wireless_nlevent_init(void) 396static int __init wireless_nlevent_init(void)
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 562cc11131f6..d49aa79b7997 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -896,7 +896,7 @@ out:
896} 896}
897 897
898static int x25_getname(struct socket *sock, struct sockaddr *uaddr, 898static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
899 int *uaddr_len, int peer) 899 int peer)
900{ 900{
901 struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)uaddr; 901 struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)uaddr;
902 struct sock *sk = sock->sk; 902 struct sock *sk = sock->sk;
@@ -913,7 +913,7 @@ static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
913 sx25->sx25_addr = x25->source_addr; 913 sx25->sx25_addr = x25->source_addr;
914 914
915 sx25->sx25_family = AF_X25; 915 sx25->sx25_family = AF_X25;
916 *uaddr_len = sizeof(*sx25); 916 rc = sizeof(*sx25);
917 917
918out: 918out:
919 return rc; 919 return rc;
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
index db0b1315d577..9c214ec681ac 100644
--- a/net/x25/x25_subr.c
+++ b/net/x25/x25_subr.c
@@ -335,8 +335,7 @@ int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q,
335 } 335 }
336 } 336 }
337 337
338 pr_debug("invalid PLP frame %02X %02X %02X\n", 338 pr_debug("invalid PLP frame %3ph\n", frame);
339 frame[0], frame[1], frame[2]);
340 339
341 return X25_ILLEGAL; 340 return X25_ILLEGAL;
342} 341}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7a23078132cf..77d9d1ab05ce 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2982,6 +2982,7 @@ static void __net_exit xfrm_net_exit(struct net *net)
2982static struct pernet_operations __net_initdata xfrm_net_ops = { 2982static struct pernet_operations __net_initdata xfrm_net_ops = {
2983 .init = xfrm_net_init, 2983 .init = xfrm_net_init,
2984 .exit = xfrm_net_exit, 2984 .exit = xfrm_net_exit,
2985 .async = true,
2985}; 2986};
2986 2987
2987void __init xfrm_init(void) 2988void __init xfrm_init(void)
diff --git a/security/tomoyo/network.c b/security/tomoyo/network.c
index cd6932e5225c..9094f4b3b367 100644
--- a/security/tomoyo/network.c
+++ b/security/tomoyo/network.c
@@ -655,10 +655,11 @@ int tomoyo_socket_listen_permission(struct socket *sock)
655 return 0; 655 return 0;
656 { 656 {
657 const int error = sock->ops->getname(sock, (struct sockaddr *) 657 const int error = sock->ops->getname(sock, (struct sockaddr *)
658 &addr, &addr_len, 0); 658 &addr, 0);
659 659
660 if (error) 660 if (error < 0)
661 return error; 661 return error;
662 addr_len = error;
662 } 663 }
663 address.protocol = type; 664 address.protocol = type;
664 address.operation = TOMOYO_NETWORK_LISTEN; 665 address.operation = TOMOYO_NETWORK_LISTEN;
diff --git a/sound/ac97/Kconfig b/sound/ac97/Kconfig
index f8a64e15e5bf..baa5f8ef89d2 100644
--- a/sound/ac97/Kconfig
+++ b/sound/ac97/Kconfig
@@ -5,7 +5,6 @@
5 5
6config AC97_BUS_NEW 6config AC97_BUS_NEW
7 tristate 7 tristate
8 select AC97
9 help 8 help
10 This is the new AC97 bus type, successor of AC97_BUS. The ported 9 This is the new AC97 bus type, successor of AC97_BUS. The ported
11 drivers which benefit from the AC97 automatic probing should "select" 10 drivers which benefit from the AC97 automatic probing should "select"
diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c
index 60db32785f62..04d4db44fae5 100644
--- a/sound/core/seq/seq_clientmgr.c
+++ b/sound/core/seq/seq_clientmgr.c
@@ -1003,7 +1003,7 @@ static ssize_t snd_seq_write(struct file *file, const char __user *buf,
1003{ 1003{
1004 struct snd_seq_client *client = file->private_data; 1004 struct snd_seq_client *client = file->private_data;
1005 int written = 0, len; 1005 int written = 0, len;
1006 int err = -EINVAL; 1006 int err;
1007 struct snd_seq_event event; 1007 struct snd_seq_event event;
1008 1008
1009 if (!(snd_seq_file_flags(file) & SNDRV_SEQ_LFLG_OUTPUT)) 1009 if (!(snd_seq_file_flags(file) & SNDRV_SEQ_LFLG_OUTPUT))
@@ -1018,11 +1018,15 @@ static ssize_t snd_seq_write(struct file *file, const char __user *buf,
1018 1018
1019 /* allocate the pool now if the pool is not allocated yet */ 1019 /* allocate the pool now if the pool is not allocated yet */
1020 if (client->pool->size > 0 && !snd_seq_write_pool_allocated(client)) { 1020 if (client->pool->size > 0 && !snd_seq_write_pool_allocated(client)) {
1021 if (snd_seq_pool_init(client->pool) < 0) 1021 mutex_lock(&client->ioctl_mutex);
1022 err = snd_seq_pool_init(client->pool);
1023 mutex_unlock(&client->ioctl_mutex);
1024 if (err < 0)
1022 return -ENOMEM; 1025 return -ENOMEM;
1023 } 1026 }
1024 1027
1025 /* only process whole events */ 1028 /* only process whole events */
1029 err = -EINVAL;
1026 while (count >= sizeof(struct snd_seq_event)) { 1030 while (count >= sizeof(struct snd_seq_event)) {
1027 /* Read in the event header from the user */ 1031 /* Read in the event header from the user */
1028 len = sizeof(event); 1032 len = sizeof(event);
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 23475888192b..ce28f7ce64e6 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -3465,6 +3465,19 @@ static void alc269_fixup_pincfg_no_hp_to_lineout(struct hda_codec *codec,
3465 spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP; 3465 spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP;
3466} 3466}
3467 3467
3468static void alc269_fixup_pincfg_U7x7_headset_mic(struct hda_codec *codec,
3469 const struct hda_fixup *fix,
3470 int action)
3471{
3472 unsigned int cfg_headphone = snd_hda_codec_get_pincfg(codec, 0x21);
3473 unsigned int cfg_headset_mic = snd_hda_codec_get_pincfg(codec, 0x19);
3474
3475 if (cfg_headphone && cfg_headset_mic == 0x411111f0)
3476 snd_hda_codec_set_pincfg(codec, 0x19,
3477 (cfg_headphone & ~AC_DEFCFG_DEVICE) |
3478 (AC_JACK_MIC_IN << AC_DEFCFG_DEVICE_SHIFT));
3479}
3480
3468static void alc269_fixup_hweq(struct hda_codec *codec, 3481static void alc269_fixup_hweq(struct hda_codec *codec,
3469 const struct hda_fixup *fix, int action) 3482 const struct hda_fixup *fix, int action)
3470{ 3483{
@@ -4972,6 +4985,28 @@ static void alc_fixup_tpt440_dock(struct hda_codec *codec,
4972 } 4985 }
4973} 4986}
4974 4987
4988static void alc_fixup_tpt470_dock(struct hda_codec *codec,
4989 const struct hda_fixup *fix, int action)
4990{
4991 static const struct hda_pintbl pincfgs[] = {
4992 { 0x17, 0x21211010 }, /* dock headphone */
4993 { 0x19, 0x21a11010 }, /* dock mic */
4994 { }
4995 };
4996 struct alc_spec *spec = codec->spec;
4997
4998 if (action == HDA_FIXUP_ACT_PRE_PROBE) {
4999 spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP;
5000 /* Enable DOCK device */
5001 snd_hda_codec_write(codec, 0x17, 0,
5002 AC_VERB_SET_CONFIG_DEFAULT_BYTES_3, 0);
5003 /* Enable DOCK device */
5004 snd_hda_codec_write(codec, 0x19, 0,
5005 AC_VERB_SET_CONFIG_DEFAULT_BYTES_3, 0);
5006 snd_hda_apply_pincfgs(codec, pincfgs);
5007 }
5008}
5009
4975static void alc_shutup_dell_xps13(struct hda_codec *codec) 5010static void alc_shutup_dell_xps13(struct hda_codec *codec)
4976{ 5011{
4977 struct alc_spec *spec = codec->spec; 5012 struct alc_spec *spec = codec->spec;
@@ -5351,6 +5386,7 @@ enum {
5351 ALC269_FIXUP_LIFEBOOK_EXTMIC, 5386 ALC269_FIXUP_LIFEBOOK_EXTMIC,
5352 ALC269_FIXUP_LIFEBOOK_HP_PIN, 5387 ALC269_FIXUP_LIFEBOOK_HP_PIN,
5353 ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT, 5388 ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT,
5389 ALC255_FIXUP_LIFEBOOK_U7x7_HEADSET_MIC,
5354 ALC269_FIXUP_AMIC, 5390 ALC269_FIXUP_AMIC,
5355 ALC269_FIXUP_DMIC, 5391 ALC269_FIXUP_DMIC,
5356 ALC269VB_FIXUP_AMIC, 5392 ALC269VB_FIXUP_AMIC,
@@ -5446,6 +5482,7 @@ enum {
5446 ALC700_FIXUP_INTEL_REFERENCE, 5482 ALC700_FIXUP_INTEL_REFERENCE,
5447 ALC274_FIXUP_DELL_BIND_DACS, 5483 ALC274_FIXUP_DELL_BIND_DACS,
5448 ALC274_FIXUP_DELL_AIO_LINEOUT_VERB, 5484 ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
5485 ALC298_FIXUP_TPT470_DOCK,
5449}; 5486};
5450 5487
5451static const struct hda_fixup alc269_fixups[] = { 5488static const struct hda_fixup alc269_fixups[] = {
@@ -5556,6 +5593,10 @@ static const struct hda_fixup alc269_fixups[] = {
5556 .type = HDA_FIXUP_FUNC, 5593 .type = HDA_FIXUP_FUNC,
5557 .v.func = alc269_fixup_pincfg_no_hp_to_lineout, 5594 .v.func = alc269_fixup_pincfg_no_hp_to_lineout,
5558 }, 5595 },
5596 [ALC255_FIXUP_LIFEBOOK_U7x7_HEADSET_MIC] = {
5597 .type = HDA_FIXUP_FUNC,
5598 .v.func = alc269_fixup_pincfg_U7x7_headset_mic,
5599 },
5559 [ALC269_FIXUP_AMIC] = { 5600 [ALC269_FIXUP_AMIC] = {
5560 .type = HDA_FIXUP_PINS, 5601 .type = HDA_FIXUP_PINS,
5561 .v.pins = (const struct hda_pintbl[]) { 5602 .v.pins = (const struct hda_pintbl[]) {
@@ -6271,6 +6312,12 @@ static const struct hda_fixup alc269_fixups[] = {
6271 .chained = true, 6312 .chained = true,
6272 .chain_id = ALC274_FIXUP_DELL_BIND_DACS 6313 .chain_id = ALC274_FIXUP_DELL_BIND_DACS
6273 }, 6314 },
6315 [ALC298_FIXUP_TPT470_DOCK] = {
6316 .type = HDA_FIXUP_FUNC,
6317 .v.func = alc_fixup_tpt470_dock,
6318 .chained = true,
6319 .chain_id = ALC293_FIXUP_LENOVO_SPK_NOISE
6320 },
6274}; 6321};
6275 6322
6276static const struct snd_pci_quirk alc269_fixup_tbl[] = { 6323static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -6321,6 +6368,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
6321 SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME), 6368 SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME),
6322 SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER), 6369 SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),
6323 SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), 6370 SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
6371 SND_PCI_QUIRK(0x1028, 0x084b, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
6372 SND_PCI_QUIRK(0x1028, 0x084e, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
6324 SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), 6373 SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
6325 SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), 6374 SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
6326 SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), 6375 SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
@@ -6422,6 +6471,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
6422 SND_PCI_QUIRK(0x10cf, 0x159f, "Lifebook E780", ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT), 6471 SND_PCI_QUIRK(0x10cf, 0x159f, "Lifebook E780", ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT),
6423 SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN), 6472 SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN),
6424 SND_PCI_QUIRK(0x10cf, 0x1757, "Lifebook E752", ALC269_FIXUP_LIFEBOOK_HP_PIN), 6473 SND_PCI_QUIRK(0x10cf, 0x1757, "Lifebook E752", ALC269_FIXUP_LIFEBOOK_HP_PIN),
6474 SND_PCI_QUIRK(0x10cf, 0x1629, "Lifebook U7x7", ALC255_FIXUP_LIFEBOOK_U7x7_HEADSET_MIC),
6425 SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC), 6475 SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC),
6426 SND_PCI_QUIRK(0x10ec, 0x10f2, "Intel Reference board", ALC700_FIXUP_INTEL_REFERENCE), 6476 SND_PCI_QUIRK(0x10ec, 0x10f2, "Intel Reference board", ALC700_FIXUP_INTEL_REFERENCE),
6427 SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), 6477 SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
@@ -6450,8 +6500,16 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
6450 SND_PCI_QUIRK(0x17aa, 0x2218, "Thinkpad X1 Carbon 2nd", ALC292_FIXUP_TPT440_DOCK), 6500 SND_PCI_QUIRK(0x17aa, 0x2218, "Thinkpad X1 Carbon 2nd", ALC292_FIXUP_TPT440_DOCK),
6451 SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK), 6501 SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK),
6452 SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK), 6502 SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK),
6503 SND_PCI_QUIRK(0x17aa, 0x222d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
6504 SND_PCI_QUIRK(0x17aa, 0x222e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
6453 SND_PCI_QUIRK(0x17aa, 0x2231, "Thinkpad T560", ALC292_FIXUP_TPT460), 6505 SND_PCI_QUIRK(0x17aa, 0x2231, "Thinkpad T560", ALC292_FIXUP_TPT460),
6454 SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460), 6506 SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460),
6507 SND_PCI_QUIRK(0x17aa, 0x2245, "Thinkpad T470", ALC298_FIXUP_TPT470_DOCK),
6508 SND_PCI_QUIRK(0x17aa, 0x2246, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
6509 SND_PCI_QUIRK(0x17aa, 0x2247, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
6510 SND_PCI_QUIRK(0x17aa, 0x224b, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
6511 SND_PCI_QUIRK(0x17aa, 0x224c, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
6512 SND_PCI_QUIRK(0x17aa, 0x224d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
6455 SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), 6513 SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
6456 SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), 6514 SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
6457 SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), 6515 SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
@@ -6472,7 +6530,12 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
6472 SND_PCI_QUIRK(0x17aa, 0x5050, "Thinkpad T560p", ALC292_FIXUP_TPT460), 6530 SND_PCI_QUIRK(0x17aa, 0x5050, "Thinkpad T560p", ALC292_FIXUP_TPT460),
6473 SND_PCI_QUIRK(0x17aa, 0x5051, "Thinkpad L460", ALC292_FIXUP_TPT460), 6531 SND_PCI_QUIRK(0x17aa, 0x5051, "Thinkpad L460", ALC292_FIXUP_TPT460),
6474 SND_PCI_QUIRK(0x17aa, 0x5053, "Thinkpad T460", ALC292_FIXUP_TPT460), 6532 SND_PCI_QUIRK(0x17aa, 0x5053, "Thinkpad T460", ALC292_FIXUP_TPT460),
6533 SND_PCI_QUIRK(0x17aa, 0x505d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
6534 SND_PCI_QUIRK(0x17aa, 0x505f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
6535 SND_PCI_QUIRK(0x17aa, 0x5062, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
6475 SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), 6536 SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
6537 SND_PCI_QUIRK(0x17aa, 0x511e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
6538 SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
6476 SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K), 6539 SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K),
6477 SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD), 6540 SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
6478 SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */ 6541 SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */
@@ -6735,6 +6798,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
6735 {0x14, 0x90170110}, 6798 {0x14, 0x90170110},
6736 {0x21, 0x02211020}), 6799 {0x21, 0x02211020}),
6737 SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, 6800 SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
6801 {0x12, 0x90a60130},
6802 {0x14, 0x90170110},
6803 {0x14, 0x01011020},
6804 {0x21, 0x0221101f}),
6805 SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
6738 ALC256_STANDARD_PINS), 6806 ALC256_STANDARD_PINS),
6739 SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC256_FIXUP_ASUS_MIC, 6807 SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC256_FIXUP_ASUS_MIC,
6740 {0x14, 0x90170110}, 6808 {0x14, 0x90170110},
@@ -6803,6 +6871,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
6803 {0x12, 0x90a60120}, 6871 {0x12, 0x90a60120},
6804 {0x14, 0x90170110}, 6872 {0x14, 0x90170110},
6805 {0x21, 0x0321101f}), 6873 {0x21, 0x0321101f}),
6874 SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
6875 {0x12, 0xb7a60130},
6876 {0x14, 0x90170110},
6877 {0x21, 0x04211020}),
6806 SND_HDA_PIN_QUIRK(0x10ec0290, 0x103c, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1, 6878 SND_HDA_PIN_QUIRK(0x10ec0290, 0x103c, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1,
6807 ALC290_STANDARD_PINS, 6879 ALC290_STANDARD_PINS,
6808 {0x15, 0x04211040}, 6880 {0x15, 0x04211040},
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 9afb8ab524c7..06b22624ab7a 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -347,17 +347,20 @@ static int get_ctl_value_v2(struct usb_mixer_elem_info *cval, int request,
347 int validx, int *value_ret) 347 int validx, int *value_ret)
348{ 348{
349 struct snd_usb_audio *chip = cval->head.mixer->chip; 349 struct snd_usb_audio *chip = cval->head.mixer->chip;
350 unsigned char buf[4 + 3 * sizeof(__u32)]; /* enough space for one range */ 350 /* enough space for one range */
351 unsigned char buf[sizeof(__u16) + 3 * sizeof(__u32)];
351 unsigned char *val; 352 unsigned char *val;
352 int idx = 0, ret, size; 353 int idx = 0, ret, val_size, size;
353 __u8 bRequest; 354 __u8 bRequest;
354 355
356 val_size = uac2_ctl_value_size(cval->val_type);
357
355 if (request == UAC_GET_CUR) { 358 if (request == UAC_GET_CUR) {
356 bRequest = UAC2_CS_CUR; 359 bRequest = UAC2_CS_CUR;
357 size = uac2_ctl_value_size(cval->val_type); 360 size = val_size;
358 } else { 361 } else {
359 bRequest = UAC2_CS_RANGE; 362 bRequest = UAC2_CS_RANGE;
360 size = sizeof(buf); 363 size = sizeof(__u16) + 3 * val_size;
361 } 364 }
362 365
363 memset(buf, 0, sizeof(buf)); 366 memset(buf, 0, sizeof(buf));
@@ -390,16 +393,17 @@ error:
390 val = buf + sizeof(__u16); 393 val = buf + sizeof(__u16);
391 break; 394 break;
392 case UAC_GET_MAX: 395 case UAC_GET_MAX:
393 val = buf + sizeof(__u16) * 2; 396 val = buf + sizeof(__u16) + val_size;
394 break; 397 break;
395 case UAC_GET_RES: 398 case UAC_GET_RES:
396 val = buf + sizeof(__u16) * 3; 399 val = buf + sizeof(__u16) + val_size * 2;
397 break; 400 break;
398 default: 401 default:
399 return -EINVAL; 402 return -EINVAL;
400 } 403 }
401 404
402 *value_ret = convert_signed_value(cval, snd_usb_combine_bytes(val, sizeof(__u16))); 405 *value_ret = convert_signed_value(cval,
406 snd_usb_combine_bytes(val, val_size));
403 407
404 return 0; 408 return 0;
405} 409}
diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index b9c9a19f9588..3cbfae6604f9 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -357,6 +357,15 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs,
357 357
358 alts = &iface->altsetting[1]; 358 alts = &iface->altsetting[1];
359 goto add_sync_ep; 359 goto add_sync_ep;
360 case USB_ID(0x1397, 0x0002):
361 ep = 0x81;
362 iface = usb_ifnum_to_if(dev, 1);
363
364 if (!iface || iface->num_altsetting == 0)
365 return -EINVAL;
366
367 alts = &iface->altsetting[1];
368 goto add_sync_ep;
360 369
361 } 370 }
362 if (attr == USB_ENDPOINT_SYNC_ASYNC && 371 if (attr == USB_ENDPOINT_SYNC_ASYNC &&
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index a66ef5777887..ea8f3de92fa4 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1363,8 +1363,11 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
1363 return SNDRV_PCM_FMTBIT_DSD_U32_BE; 1363 return SNDRV_PCM_FMTBIT_DSD_U32_BE;
1364 break; 1364 break;
1365 1365
1366 /* Amanero Combo384 USB interface with native DSD support */ 1366 /* Amanero Combo384 USB based DACs with native DSD support */
1367 case USB_ID(0x16d0, 0x071a): 1367 case USB_ID(0x16d0, 0x071a): /* Amanero - Combo384 */
1368 case USB_ID(0x2ab6, 0x0004): /* T+A DAC8DSD-V2.0, MP1000E-V2.0, MP2000R-V2.0, MP2500R-V2.0, MP3100HV-V2.0 */
1369 case USB_ID(0x2ab6, 0x0005): /* T+A USB HD Audio 1 */
1370 case USB_ID(0x2ab6, 0x0006): /* T+A USB HD Audio 2 */
1368 if (fp->altsetting == 2) { 1371 if (fp->altsetting == 2) {
1369 switch (le16_to_cpu(chip->dev->descriptor.bcdDevice)) { 1372 switch (le16_to_cpu(chip->dev->descriptor.bcdDevice)) {
1370 case 0x199: 1373 case 0x199:
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index 637b7263cb86..833ed9a16adf 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -632,6 +632,8 @@ struct kvm_ppc_cpu_char {
632#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc) 632#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
633#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd) 633#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
634 634
635#define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
636
635/* Transactional Memory checkpointed state: 637/* Transactional Memory checkpointed state:
636 * This is all GPRs, all VSX regs and a subset of SPRs 638 * This is all GPRs, all VSX regs and a subset of SPRs
637 */ 639 */
diff --git a/tools/arch/s390/include/uapi/asm/unistd.h b/tools/arch/s390/include/uapi/asm/unistd.h
deleted file mode 100644
index 725120939051..000000000000
--- a/tools/arch/s390/include/uapi/asm/unistd.h
+++ /dev/null
@@ -1,412 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2/*
3 * S390 version
4 *
5 * Derived from "include/asm-i386/unistd.h"
6 */
7
8#ifndef _UAPI_ASM_S390_UNISTD_H_
9#define _UAPI_ASM_S390_UNISTD_H_
10
11/*
12 * This file contains the system call numbers.
13 */
14
15#define __NR_exit 1
16#define __NR_fork 2
17#define __NR_read 3
18#define __NR_write 4
19#define __NR_open 5
20#define __NR_close 6
21#define __NR_restart_syscall 7
22#define __NR_creat 8
23#define __NR_link 9
24#define __NR_unlink 10
25#define __NR_execve 11
26#define __NR_chdir 12
27#define __NR_mknod 14
28#define __NR_chmod 15
29#define __NR_lseek 19
30#define __NR_getpid 20
31#define __NR_mount 21
32#define __NR_umount 22
33#define __NR_ptrace 26
34#define __NR_alarm 27
35#define __NR_pause 29
36#define __NR_utime 30
37#define __NR_access 33
38#define __NR_nice 34
39#define __NR_sync 36
40#define __NR_kill 37
41#define __NR_rename 38
42#define __NR_mkdir 39
43#define __NR_rmdir 40
44#define __NR_dup 41
45#define __NR_pipe 42
46#define __NR_times 43
47#define __NR_brk 45
48#define __NR_signal 48
49#define __NR_acct 51
50#define __NR_umount2 52
51#define __NR_ioctl 54
52#define __NR_fcntl 55
53#define __NR_setpgid 57
54#define __NR_umask 60
55#define __NR_chroot 61
56#define __NR_ustat 62
57#define __NR_dup2 63
58#define __NR_getppid 64
59#define __NR_getpgrp 65
60#define __NR_setsid 66
61#define __NR_sigaction 67
62#define __NR_sigsuspend 72
63#define __NR_sigpending 73
64#define __NR_sethostname 74
65#define __NR_setrlimit 75
66#define __NR_getrusage 77
67#define __NR_gettimeofday 78
68#define __NR_settimeofday 79
69#define __NR_symlink 83
70#define __NR_readlink 85
71#define __NR_uselib 86
72#define __NR_swapon 87
73#define __NR_reboot 88
74#define __NR_readdir 89
75#define __NR_mmap 90
76#define __NR_munmap 91
77#define __NR_truncate 92
78#define __NR_ftruncate 93
79#define __NR_fchmod 94
80#define __NR_getpriority 96
81#define __NR_setpriority 97
82#define __NR_statfs 99
83#define __NR_fstatfs 100
84#define __NR_socketcall 102
85#define __NR_syslog 103
86#define __NR_setitimer 104
87#define __NR_getitimer 105
88#define __NR_stat 106
89#define __NR_lstat 107
90#define __NR_fstat 108
91#define __NR_lookup_dcookie 110
92#define __NR_vhangup 111
93#define __NR_idle 112
94#define __NR_wait4 114
95#define __NR_swapoff 115
96#define __NR_sysinfo 116
97#define __NR_ipc 117
98#define __NR_fsync 118
99#define __NR_sigreturn 119
100#define __NR_clone 120
101#define __NR_setdomainname 121
102#define __NR_uname 122
103#define __NR_adjtimex 124
104#define __NR_mprotect 125
105#define __NR_sigprocmask 126
106#define __NR_create_module 127
107#define __NR_init_module 128
108#define __NR_delete_module 129
109#define __NR_get_kernel_syms 130
110#define __NR_quotactl 131
111#define __NR_getpgid 132
112#define __NR_fchdir 133
113#define __NR_bdflush 134
114#define __NR_sysfs 135
115#define __NR_personality 136
116#define __NR_afs_syscall 137 /* Syscall for Andrew File System */
117#define __NR_getdents 141
118#define __NR_flock 143
119#define __NR_msync 144
120#define __NR_readv 145
121#define __NR_writev 146
122#define __NR_getsid 147
123#define __NR_fdatasync 148
124#define __NR__sysctl 149
125#define __NR_mlock 150
126#define __NR_munlock 151
127#define __NR_mlockall 152
128#define __NR_munlockall 153
129#define __NR_sched_setparam 154
130#define __NR_sched_getparam 155
131#define __NR_sched_setscheduler 156
132#define __NR_sched_getscheduler 157
133#define __NR_sched_yield 158
134#define __NR_sched_get_priority_max 159
135#define __NR_sched_get_priority_min 160
136#define __NR_sched_rr_get_interval 161
137#define __NR_nanosleep 162
138#define __NR_mremap 163
139#define __NR_query_module 167
140#define __NR_poll 168
141#define __NR_nfsservctl 169
142#define __NR_prctl 172
143#define __NR_rt_sigreturn 173
144#define __NR_rt_sigaction 174
145#define __NR_rt_sigprocmask 175
146#define __NR_rt_sigpending 176
147#define __NR_rt_sigtimedwait 177
148#define __NR_rt_sigqueueinfo 178
149#define __NR_rt_sigsuspend 179
150#define __NR_pread64 180
151#define __NR_pwrite64 181
152#define __NR_getcwd 183
153#define __NR_capget 184
154#define __NR_capset 185
155#define __NR_sigaltstack 186
156#define __NR_sendfile 187
157#define __NR_getpmsg 188
158#define __NR_putpmsg 189
159#define __NR_vfork 190
160#define __NR_pivot_root 217
161#define __NR_mincore 218
162#define __NR_madvise 219
163#define __NR_getdents64 220
164#define __NR_readahead 222
165#define __NR_setxattr 224
166#define __NR_lsetxattr 225
167#define __NR_fsetxattr 226
168#define __NR_getxattr 227
169#define __NR_lgetxattr 228
170#define __NR_fgetxattr 229
171#define __NR_listxattr 230
172#define __NR_llistxattr 231
173#define __NR_flistxattr 232
174#define __NR_removexattr 233
175#define __NR_lremovexattr 234
176#define __NR_fremovexattr 235
177#define __NR_gettid 236
178#define __NR_tkill 237
179#define __NR_futex 238
180#define __NR_sched_setaffinity 239
181#define __NR_sched_getaffinity 240
182#define __NR_tgkill 241
183/* Number 242 is reserved for tux */
184#define __NR_io_setup 243
185#define __NR_io_destroy 244
186#define __NR_io_getevents 245
187#define __NR_io_submit 246
188#define __NR_io_cancel 247
189#define __NR_exit_group 248
190#define __NR_epoll_create 249
191#define __NR_epoll_ctl 250
192#define __NR_epoll_wait 251
193#define __NR_set_tid_address 252
194#define __NR_fadvise64 253
195#define __NR_timer_create 254
196#define __NR_timer_settime 255
197#define __NR_timer_gettime 256
198#define __NR_timer_getoverrun 257
199#define __NR_timer_delete 258
200#define __NR_clock_settime 259
201#define __NR_clock_gettime 260
202#define __NR_clock_getres 261
203#define __NR_clock_nanosleep 262
204/* Number 263 is reserved for vserver */
205#define __NR_statfs64 265
206#define __NR_fstatfs64 266
207#define __NR_remap_file_pages 267
208#define __NR_mbind 268
209#define __NR_get_mempolicy 269
210#define __NR_set_mempolicy 270
211#define __NR_mq_open 271
212#define __NR_mq_unlink 272
213#define __NR_mq_timedsend 273
214#define __NR_mq_timedreceive 274
215#define __NR_mq_notify 275
216#define __NR_mq_getsetattr 276
217#define __NR_kexec_load 277
218#define __NR_add_key 278
219#define __NR_request_key 279
220#define __NR_keyctl 280
221#define __NR_waitid 281
222#define __NR_ioprio_set 282
223#define __NR_ioprio_get 283
224#define __NR_inotify_init 284
225#define __NR_inotify_add_watch 285
226#define __NR_inotify_rm_watch 286
227#define __NR_migrate_pages 287
228#define __NR_openat 288
229#define __NR_mkdirat 289
230#define __NR_mknodat 290
231#define __NR_fchownat 291
232#define __NR_futimesat 292
233#define __NR_unlinkat 294
234#define __NR_renameat 295
235#define __NR_linkat 296
236#define __NR_symlinkat 297
237#define __NR_readlinkat 298
238#define __NR_fchmodat 299
239#define __NR_faccessat 300
240#define __NR_pselect6 301
241#define __NR_ppoll 302
242#define __NR_unshare 303
243#define __NR_set_robust_list 304
244#define __NR_get_robust_list 305
245#define __NR_splice 306
246#define __NR_sync_file_range 307
247#define __NR_tee 308
248#define __NR_vmsplice 309
249#define __NR_move_pages 310
250#define __NR_getcpu 311
251#define __NR_epoll_pwait 312
252#define __NR_utimes 313
253#define __NR_fallocate 314
254#define __NR_utimensat 315
255#define __NR_signalfd 316
256#define __NR_timerfd 317
257#define __NR_eventfd 318
258#define __NR_timerfd_create 319
259#define __NR_timerfd_settime 320
260#define __NR_timerfd_gettime 321
261#define __NR_signalfd4 322
262#define __NR_eventfd2 323
263#define __NR_inotify_init1 324
264#define __NR_pipe2 325
265#define __NR_dup3 326
266#define __NR_epoll_create1 327
267#define __NR_preadv 328
268#define __NR_pwritev 329
269#define __NR_rt_tgsigqueueinfo 330
270#define __NR_perf_event_open 331
271#define __NR_fanotify_init 332
272#define __NR_fanotify_mark 333
273#define __NR_prlimit64 334
274#define __NR_name_to_handle_at 335
275#define __NR_open_by_handle_at 336
276#define __NR_clock_adjtime 337
277#define __NR_syncfs 338
278#define __NR_setns 339
279#define __NR_process_vm_readv 340
280#define __NR_process_vm_writev 341
281#define __NR_s390_runtime_instr 342
282#define __NR_kcmp 343
283#define __NR_finit_module 344
284#define __NR_sched_setattr 345
285#define __NR_sched_getattr 346
286#define __NR_renameat2 347
287#define __NR_seccomp 348
288#define __NR_getrandom 349
289#define __NR_memfd_create 350
290#define __NR_bpf 351
291#define __NR_s390_pci_mmio_write 352
292#define __NR_s390_pci_mmio_read 353
293#define __NR_execveat 354
294#define __NR_userfaultfd 355
295#define __NR_membarrier 356
296#define __NR_recvmmsg 357
297#define __NR_sendmmsg 358
298#define __NR_socket 359
299#define __NR_socketpair 360
300#define __NR_bind 361
301#define __NR_connect 362
302#define __NR_listen 363
303#define __NR_accept4 364
304#define __NR_getsockopt 365
305#define __NR_setsockopt 366
306#define __NR_getsockname 367
307#define __NR_getpeername 368
308#define __NR_sendto 369
309#define __NR_sendmsg 370
310#define __NR_recvfrom 371
311#define __NR_recvmsg 372
312#define __NR_shutdown 373
313#define __NR_mlock2 374
314#define __NR_copy_file_range 375
315#define __NR_preadv2 376
316#define __NR_pwritev2 377
317#define __NR_s390_guarded_storage 378
318#define __NR_statx 379
319#define __NR_s390_sthyi 380
320#define NR_syscalls 381
321
322/*
323 * There are some system calls that are not present on 64 bit, some
324 * have a different name although they do the same (e.g. __NR_chown32
325 * is __NR_chown on 64 bit).
326 */
327#ifndef __s390x__
328
329#define __NR_time 13
330#define __NR_lchown 16
331#define __NR_setuid 23
332#define __NR_getuid 24
333#define __NR_stime 25
334#define __NR_setgid 46
335#define __NR_getgid 47
336#define __NR_geteuid 49
337#define __NR_getegid 50
338#define __NR_setreuid 70
339#define __NR_setregid 71
340#define __NR_getrlimit 76
341#define __NR_getgroups 80
342#define __NR_setgroups 81
343#define __NR_fchown 95
344#define __NR_ioperm 101
345#define __NR_setfsuid 138
346#define __NR_setfsgid 139
347#define __NR__llseek 140
348#define __NR__newselect 142
349#define __NR_setresuid 164
350#define __NR_getresuid 165
351#define __NR_setresgid 170
352#define __NR_getresgid 171
353#define __NR_chown 182
354#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */
355#define __NR_mmap2 192
356#define __NR_truncate64 193
357#define __NR_ftruncate64 194
358#define __NR_stat64 195
359#define __NR_lstat64 196
360#define __NR_fstat64 197
361#define __NR_lchown32 198
362#define __NR_getuid32 199
363#define __NR_getgid32 200
364#define __NR_geteuid32 201
365#define __NR_getegid32 202
366#define __NR_setreuid32 203
367#define __NR_setregid32 204
368#define __NR_getgroups32 205
369#define __NR_setgroups32 206
370#define __NR_fchown32 207
371#define __NR_setresuid32 208
372#define __NR_getresuid32 209
373#define __NR_setresgid32 210
374#define __NR_getresgid32 211
375#define __NR_chown32 212
376#define __NR_setuid32 213
377#define __NR_setgid32 214
378#define __NR_setfsuid32 215
379#define __NR_setfsgid32 216
380#define __NR_fcntl64 221
381#define __NR_sendfile64 223
382#define __NR_fadvise64_64 264
383#define __NR_fstatat64 293
384
385#else
386
387#define __NR_select 142
388#define __NR_getrlimit 191 /* SuS compliant getrlimit */
389#define __NR_lchown 198
390#define __NR_getuid 199
391#define __NR_getgid 200
392#define __NR_geteuid 201
393#define __NR_getegid 202
394#define __NR_setreuid 203
395#define __NR_setregid 204
396#define __NR_getgroups 205
397#define __NR_setgroups 206
398#define __NR_fchown 207
399#define __NR_setresuid 208
400#define __NR_getresuid 209
401#define __NR_setresgid 210
402#define __NR_getresgid 211
403#define __NR_chown 212
404#define __NR_setuid 213
405#define __NR_setgid 214
406#define __NR_setfsuid 215
407#define __NR_setfsgid 216
408#define __NR_newfstatat 293
409
410#endif
411
412#endif /* _UAPI_ASM_S390_UNISTD_H_ */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 1d9199e1c2ad..0dfe4d3f74e2 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -210,6 +210,7 @@
210 210
211#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ 211#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
212#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ 212#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
213#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
213 214
214#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ 215#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
215 216
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index ac3c6503ca27..536ee4febd74 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -86,6 +86,62 @@ enum i915_mocs_table_index {
86 I915_MOCS_CACHED, 86 I915_MOCS_CACHED,
87}; 87};
88 88
89/*
90 * Different engines serve different roles, and there may be more than one
91 * engine serving each role. enum drm_i915_gem_engine_class provides a
92 * classification of the role of the engine, which may be used when requesting
93 * operations to be performed on a certain subset of engines, or for providing
94 * information about that group.
95 */
96enum drm_i915_gem_engine_class {
97 I915_ENGINE_CLASS_RENDER = 0,
98 I915_ENGINE_CLASS_COPY = 1,
99 I915_ENGINE_CLASS_VIDEO = 2,
100 I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
101
102 I915_ENGINE_CLASS_INVALID = -1
103};
104
105/**
106 * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
107 *
108 */
109
110enum drm_i915_pmu_engine_sample {
111 I915_SAMPLE_BUSY = 0,
112 I915_SAMPLE_WAIT = 1,
113 I915_SAMPLE_SEMA = 2
114};
115
116#define I915_PMU_SAMPLE_BITS (4)
117#define I915_PMU_SAMPLE_MASK (0xf)
118#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
119#define I915_PMU_CLASS_SHIFT \
120 (I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
121
122#define __I915_PMU_ENGINE(class, instance, sample) \
123 ((class) << I915_PMU_CLASS_SHIFT | \
124 (instance) << I915_PMU_SAMPLE_BITS | \
125 (sample))
126
127#define I915_PMU_ENGINE_BUSY(class, instance) \
128 __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
129
130#define I915_PMU_ENGINE_WAIT(class, instance) \
131 __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
132
133#define I915_PMU_ENGINE_SEMA(class, instance) \
134 __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
135
136#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
137
138#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
139#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1)
140#define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2)
141#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3)
142
143#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY
144
89/* Each region is a minimum of 16k, and there are at most 255 of them. 145/* Each region is a minimum of 16k, and there are at most 255 of them.
90 */ 146 */
91#define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use 147#define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use
@@ -450,6 +506,27 @@ typedef struct drm_i915_irq_wait {
450 */ 506 */
451#define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49 507#define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49
452 508
509/*
510 * Query whether every context (both per-file default and user created) is
511 * isolated (insofar as HW supports). If this parameter is not true, then
512 * freshly created contexts may inherit values from an existing context,
513 * rather than default HW values. If true, it also ensures (insofar as HW
514 * supports) that all state set by this context will not leak to any other
515 * context.
516 *
517 * As not every engine across every gen support contexts, the returned
518 * value reports the support of context isolation for individual engines by
519 * returning a bitmask of each engine class set to true if that class supports
520 * isolation.
521 */
522#define I915_PARAM_HAS_CONTEXT_ISOLATION 50
523
524/* Frequency of the command streamer timestamps given by the *_TIMESTAMP
525 * registers. This used to be fixed per platform but from CNL onwards, this
526 * might vary depending on the parts.
527 */
528#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51
529
453typedef struct drm_i915_getparam { 530typedef struct drm_i915_getparam {
454 __s32 param; 531 __s32 param;
455 /* 532 /*
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 8616131e2c61..6d9447700e18 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -163,6 +163,7 @@ enum {
163 IFLA_IF_NETNSID, 163 IFLA_IF_NETNSID,
164 IFLA_CARRIER_UP_COUNT, 164 IFLA_CARRIER_UP_COUNT,
165 IFLA_CARRIER_DOWN_COUNT, 165 IFLA_CARRIER_DOWN_COUNT,
166 IFLA_NEW_IFINDEX,
166 __IFLA_MAX 167 __IFLA_MAX
167}; 168};
168 169
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 8fb90a0819c3..0fb5ef939732 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1362,6 +1362,96 @@ struct kvm_s390_ucas_mapping {
1362/* Available with KVM_CAP_S390_CMMA_MIGRATION */ 1362/* Available with KVM_CAP_S390_CMMA_MIGRATION */
1363#define KVM_S390_GET_CMMA_BITS _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log) 1363#define KVM_S390_GET_CMMA_BITS _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log)
1364#define KVM_S390_SET_CMMA_BITS _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log) 1364#define KVM_S390_SET_CMMA_BITS _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log)
1365/* Memory Encryption Commands */
1366#define KVM_MEMORY_ENCRYPT_OP _IOWR(KVMIO, 0xba, unsigned long)
1367
1368struct kvm_enc_region {
1369 __u64 addr;
1370 __u64 size;
1371};
1372
1373#define KVM_MEMORY_ENCRYPT_REG_REGION _IOR(KVMIO, 0xbb, struct kvm_enc_region)
1374#define KVM_MEMORY_ENCRYPT_UNREG_REGION _IOR(KVMIO, 0xbc, struct kvm_enc_region)
1375
1376/* Secure Encrypted Virtualization command */
1377enum sev_cmd_id {
1378 /* Guest initialization commands */
1379 KVM_SEV_INIT = 0,
1380 KVM_SEV_ES_INIT,
1381 /* Guest launch commands */
1382 KVM_SEV_LAUNCH_START,
1383 KVM_SEV_LAUNCH_UPDATE_DATA,
1384 KVM_SEV_LAUNCH_UPDATE_VMSA,
1385 KVM_SEV_LAUNCH_SECRET,
1386 KVM_SEV_LAUNCH_MEASURE,
1387 KVM_SEV_LAUNCH_FINISH,
1388 /* Guest migration commands (outgoing) */
1389 KVM_SEV_SEND_START,
1390 KVM_SEV_SEND_UPDATE_DATA,
1391 KVM_SEV_SEND_UPDATE_VMSA,
1392 KVM_SEV_SEND_FINISH,
1393 /* Guest migration commands (incoming) */
1394 KVM_SEV_RECEIVE_START,
1395 KVM_SEV_RECEIVE_UPDATE_DATA,
1396 KVM_SEV_RECEIVE_UPDATE_VMSA,
1397 KVM_SEV_RECEIVE_FINISH,
1398 /* Guest status and debug commands */
1399 KVM_SEV_GUEST_STATUS,
1400 KVM_SEV_DBG_DECRYPT,
1401 KVM_SEV_DBG_ENCRYPT,
1402 /* Guest certificates commands */
1403 KVM_SEV_CERT_EXPORT,
1404
1405 KVM_SEV_NR_MAX,
1406};
1407
1408struct kvm_sev_cmd {
1409 __u32 id;
1410 __u64 data;
1411 __u32 error;
1412 __u32 sev_fd;
1413};
1414
1415struct kvm_sev_launch_start {
1416 __u32 handle;
1417 __u32 policy;
1418 __u64 dh_uaddr;
1419 __u32 dh_len;
1420 __u64 session_uaddr;
1421 __u32 session_len;
1422};
1423
1424struct kvm_sev_launch_update_data {
1425 __u64 uaddr;
1426 __u32 len;
1427};
1428
1429
1430struct kvm_sev_launch_secret {
1431 __u64 hdr_uaddr;
1432 __u32 hdr_len;
1433 __u64 guest_uaddr;
1434 __u32 guest_len;
1435 __u64 trans_uaddr;
1436 __u32 trans_len;
1437};
1438
1439struct kvm_sev_launch_measure {
1440 __u64 uaddr;
1441 __u32 len;
1442};
1443
1444struct kvm_sev_guest_status {
1445 __u32 handle;
1446 __u32 policy;
1447 __u32 state;
1448};
1449
1450struct kvm_sev_dbg {
1451 __u64 src_uaddr;
1452 __u64 dst_uaddr;
1453 __u32 len;
1454};
1365 1455
1366#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) 1456#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
1367#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) 1457#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index b00b1896547e..a8cb69a26576 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -852,8 +852,14 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func,
852 * This is a fairly uncommon pattern which is new for GCC 6. As of this 852 * This is a fairly uncommon pattern which is new for GCC 6. As of this
853 * writing, there are 11 occurrences of it in the allmodconfig kernel. 853 * writing, there are 11 occurrences of it in the allmodconfig kernel.
854 * 854 *
855 * As of GCC 7 there are quite a few more of these and the 'in between' code
856 * is significant. Esp. with KASAN enabled some of the code between the mov
857 * and jmpq uses .rodata itself, which can confuse things.
858 *
855 * TODO: Once we have DWARF CFI and smarter instruction decoding logic, 859 * TODO: Once we have DWARF CFI and smarter instruction decoding logic,
856 * ensure the same register is used in the mov and jump instructions. 860 * ensure the same register is used in the mov and jump instructions.
861 *
862 * NOTE: RETPOLINE made it harder still to decode dynamic jumps.
857 */ 863 */
858static struct rela *find_switch_table(struct objtool_file *file, 864static struct rela *find_switch_table(struct objtool_file *file,
859 struct symbol *func, 865 struct symbol *func,
@@ -875,12 +881,25 @@ static struct rela *find_switch_table(struct objtool_file *file,
875 text_rela->addend + 4); 881 text_rela->addend + 4);
876 if (!rodata_rela) 882 if (!rodata_rela)
877 return NULL; 883 return NULL;
884
878 file->ignore_unreachables = true; 885 file->ignore_unreachables = true;
879 return rodata_rela; 886 return rodata_rela;
880 } 887 }
881 888
882 /* case 3 */ 889 /* case 3 */
883 func_for_each_insn_continue_reverse(file, func, insn) { 890 /*
891 * Backward search using the @first_jump_src links, these help avoid
892 * much of the 'in between' code. Which avoids us getting confused by
893 * it.
894 */
895 for (insn = list_prev_entry(insn, list);
896
897 &insn->list != &file->insn_list &&
898 insn->sec == func->sec &&
899 insn->offset >= func->offset;
900
901 insn = insn->first_jump_src ?: list_prev_entry(insn, list)) {
902
884 if (insn->type == INSN_JUMP_DYNAMIC) 903 if (insn->type == INSN_JUMP_DYNAMIC)
885 break; 904 break;
886 905
@@ -910,14 +929,32 @@ static struct rela *find_switch_table(struct objtool_file *file,
910 return NULL; 929 return NULL;
911} 930}
912 931
932
913static int add_func_switch_tables(struct objtool_file *file, 933static int add_func_switch_tables(struct objtool_file *file,
914 struct symbol *func) 934 struct symbol *func)
915{ 935{
916 struct instruction *insn, *prev_jump = NULL; 936 struct instruction *insn, *last = NULL, *prev_jump = NULL;
917 struct rela *rela, *prev_rela = NULL; 937 struct rela *rela, *prev_rela = NULL;
918 int ret; 938 int ret;
919 939
920 func_for_each_insn(file, func, insn) { 940 func_for_each_insn(file, func, insn) {
941 if (!last)
942 last = insn;
943
944 /*
945 * Store back-pointers for unconditional forward jumps such
946 * that find_switch_table() can back-track using those and
947 * avoid some potentially confusing code.
948 */
949 if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest &&
950 insn->offset > last->offset &&
951 insn->jump_dest->offset > insn->offset &&
952 !insn->jump_dest->first_jump_src) {
953
954 insn->jump_dest->first_jump_src = insn;
955 last = insn->jump_dest;
956 }
957
921 if (insn->type != INSN_JUMP_DYNAMIC) 958 if (insn->type != INSN_JUMP_DYNAMIC)
922 continue; 959 continue;
923 960
@@ -1899,13 +1936,19 @@ static bool ignore_unreachable_insn(struct instruction *insn)
1899 if (is_kasan_insn(insn) || is_ubsan_insn(insn)) 1936 if (is_kasan_insn(insn) || is_ubsan_insn(insn))
1900 return true; 1937 return true;
1901 1938
1902 if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) { 1939 if (insn->type == INSN_JUMP_UNCONDITIONAL) {
1903 insn = insn->jump_dest; 1940 if (insn->jump_dest &&
1904 continue; 1941 insn->jump_dest->func == insn->func) {
1942 insn = insn->jump_dest;
1943 continue;
1944 }
1945
1946 break;
1905 } 1947 }
1906 1948
1907 if (insn->offset + insn->len >= insn->func->offset + insn->func->len) 1949 if (insn->offset + insn->len >= insn->func->offset + insn->func->len)
1908 break; 1950 break;
1951
1909 insn = list_next_entry(insn, list); 1952 insn = list_next_entry(insn, list);
1910 } 1953 }
1911 1954
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index dbadb304a410..23a1d065cae1 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -47,6 +47,7 @@ struct instruction {
47 bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts; 47 bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
48 struct symbol *call_dest; 48 struct symbol *call_dest;
49 struct instruction *jump_dest; 49 struct instruction *jump_dest;
50 struct instruction *first_jump_src;
50 struct list_head alts; 51 struct list_head alts;
51 struct symbol *func; 52 struct symbol *func;
52 struct stack_op stack_op; 53 struct stack_op stack_op;
diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt
index f0796a47dfa3..90bb4aabe4f8 100644
--- a/tools/perf/Documentation/perf-data.txt
+++ b/tools/perf/Documentation/perf-data.txt
@@ -30,6 +30,10 @@ OPTIONS for 'convert'
30-i:: 30-i::
31 Specify input perf data file path. 31 Specify input perf data file path.
32 32
33-f::
34--force::
35 Don't complain, do it.
36
33-v:: 37-v::
34--verbose:: 38--verbose::
35 Be more verbose (show counter open errors, etc). 39 Be more verbose (show counter open errors, etc).
diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile
index 48228de415d0..dfa6e3103437 100644
--- a/tools/perf/arch/s390/Makefile
+++ b/tools/perf/arch/s390/Makefile
@@ -10,15 +10,19 @@ PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
10 10
11out := $(OUTPUT)arch/s390/include/generated/asm 11out := $(OUTPUT)arch/s390/include/generated/asm
12header := $(out)/syscalls_64.c 12header := $(out)/syscalls_64.c
13sysdef := $(srctree)/tools/arch/s390/include/uapi/asm/unistd.h 13syskrn := $(srctree)/arch/s390/kernel/syscalls/syscall.tbl
14sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls/ 14sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls
15sysdef := $(sysprf)/syscall.tbl
15systbl := $(sysprf)/mksyscalltbl 16systbl := $(sysprf)/mksyscalltbl
16 17
17# Create output directory if not already present 18# Create output directory if not already present
18_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') 19_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
19 20
20$(header): $(sysdef) $(systbl) 21$(header): $(sysdef) $(systbl)
21 $(Q)$(SHELL) '$(systbl)' '$(CC)' $(sysdef) > $@ 22 @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
23 (diff -B $(sysdef) $(syskrn) >/dev/null) \
24 || echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true
25 $(Q)$(SHELL) '$(systbl)' $(sysdef) > $@
22 26
23clean:: 27clean::
24 $(call QUIET_CLEAN, s390) $(RM) $(header) 28 $(call QUIET_CLEAN, s390) $(RM) $(header)
diff --git a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl
index 7fa0d0abd419..72ecbb676370 100755
--- a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl
+++ b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl
@@ -3,25 +3,23 @@
3# 3#
4# Generate system call table for perf 4# Generate system call table for perf
5# 5#
6# 6# Copyright IBM Corp. 2017, 2018
7# Copyright IBM Corp. 2017
8# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> 7# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
9# 8#
10 9
11gcc=$1 10SYSCALL_TBL=$1
12input=$2
13 11
14if ! test -r $input; then 12if ! test -r $SYSCALL_TBL; then
15 echo "Could not read input file" >&2 13 echo "Could not read input file" >&2
16 exit 1 14 exit 1
17fi 15fi
18 16
19create_table() 17create_table()
20{ 18{
21 local max_nr 19 local max_nr nr abi sc discard
22 20
23 echo 'static const char *syscalltbl_s390_64[] = {' 21 echo 'static const char *syscalltbl_s390_64[] = {'
24 while read sc nr; do 22 while read nr abi sc discard; do
25 printf '\t[%d] = "%s",\n' $nr $sc 23 printf '\t[%d] = "%s",\n' $nr $sc
26 max_nr=$nr 24 max_nr=$nr
27 done 25 done
@@ -29,8 +27,6 @@ create_table()
29 echo "#define SYSCALLTBL_S390_64_MAX_ID $max_nr" 27 echo "#define SYSCALLTBL_S390_64_MAX_ID $max_nr"
30} 28}
31 29
32 30grep -E "^[[:digit:]]+[[:space:]]+(common|64)" $SYSCALL_TBL \
33$gcc -m64 -E -dM -x c $input \ 31 |sort -k1 -n \
34 |sed -ne 's/^#define __NR_//p' \
35 |sort -t' ' -k2 -nu \
36 |create_table 32 |create_table
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
new file mode 100644
index 000000000000..b38d48464368
--- /dev/null
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -0,0 +1,390 @@
1# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2#
3# System call table for s390
4#
5# Format:
6#
7# <nr> <abi> <syscall> <entry-64bit> <compat-entry>
8#
9# where <abi> can be common, 64, or 32
10
111 common exit sys_exit sys_exit
122 common fork sys_fork sys_fork
133 common read sys_read compat_sys_s390_read
144 common write sys_write compat_sys_s390_write
155 common open sys_open compat_sys_open
166 common close sys_close sys_close
177 common restart_syscall sys_restart_syscall sys_restart_syscall
188 common creat sys_creat compat_sys_creat
199 common link sys_link compat_sys_link
2010 common unlink sys_unlink compat_sys_unlink
2111 common execve sys_execve compat_sys_execve
2212 common chdir sys_chdir compat_sys_chdir
2313 32 time - compat_sys_time
2414 common mknod sys_mknod compat_sys_mknod
2515 common chmod sys_chmod compat_sys_chmod
2616 32 lchown - compat_sys_s390_lchown16
2719 common lseek sys_lseek compat_sys_lseek
2820 common getpid sys_getpid sys_getpid
2921 common mount sys_mount compat_sys_mount
3022 common umount sys_oldumount compat_sys_oldumount
3123 32 setuid - compat_sys_s390_setuid16
3224 32 getuid - compat_sys_s390_getuid16
3325 32 stime - compat_sys_stime
3426 common ptrace sys_ptrace compat_sys_ptrace
3527 common alarm sys_alarm sys_alarm
3629 common pause sys_pause sys_pause
3730 common utime sys_utime compat_sys_utime
3833 common access sys_access compat_sys_access
3934 common nice sys_nice sys_nice
4036 common sync sys_sync sys_sync
4137 common kill sys_kill sys_kill
4238 common rename sys_rename compat_sys_rename
4339 common mkdir sys_mkdir compat_sys_mkdir
4440 common rmdir sys_rmdir compat_sys_rmdir
4541 common dup sys_dup sys_dup
4642 common pipe sys_pipe compat_sys_pipe
4743 common times sys_times compat_sys_times
4845 common brk sys_brk compat_sys_brk
4946 32 setgid - compat_sys_s390_setgid16
5047 32 getgid - compat_sys_s390_getgid16
5148 common signal sys_signal compat_sys_signal
5249 32 geteuid - compat_sys_s390_geteuid16
5350 32 getegid - compat_sys_s390_getegid16
5451 common acct sys_acct compat_sys_acct
5552 common umount2 sys_umount compat_sys_umount
5654 common ioctl sys_ioctl compat_sys_ioctl
5755 common fcntl sys_fcntl compat_sys_fcntl
5857 common setpgid sys_setpgid sys_setpgid
5960 common umask sys_umask sys_umask
6061 common chroot sys_chroot compat_sys_chroot
6162 common ustat sys_ustat compat_sys_ustat
6263 common dup2 sys_dup2 sys_dup2
6364 common getppid sys_getppid sys_getppid
6465 common getpgrp sys_getpgrp sys_getpgrp
6566 common setsid sys_setsid sys_setsid
6667 common sigaction sys_sigaction compat_sys_sigaction
6770 32 setreuid - compat_sys_s390_setreuid16
6871 32 setregid - compat_sys_s390_setregid16
6972 common sigsuspend sys_sigsuspend compat_sys_sigsuspend
7073 common sigpending sys_sigpending compat_sys_sigpending
7174 common sethostname sys_sethostname compat_sys_sethostname
7275 common setrlimit sys_setrlimit compat_sys_setrlimit
7376 32 getrlimit - compat_sys_old_getrlimit
7477 common getrusage sys_getrusage compat_sys_getrusage
7578 common gettimeofday sys_gettimeofday compat_sys_gettimeofday
7679 common settimeofday sys_settimeofday compat_sys_settimeofday
7780 32 getgroups - compat_sys_s390_getgroups16
7881 32 setgroups - compat_sys_s390_setgroups16
7983 common symlink sys_symlink compat_sys_symlink
8085 common readlink sys_readlink compat_sys_readlink
8186 common uselib sys_uselib compat_sys_uselib
8287 common swapon sys_swapon compat_sys_swapon
8388 common reboot sys_reboot compat_sys_reboot
8489 common readdir - compat_sys_old_readdir
8590 common mmap sys_old_mmap compat_sys_s390_old_mmap
8691 common munmap sys_munmap compat_sys_munmap
8792 common truncate sys_truncate compat_sys_truncate
8893 common ftruncate sys_ftruncate compat_sys_ftruncate
8994 common fchmod sys_fchmod sys_fchmod
9095 32 fchown - compat_sys_s390_fchown16
9196 common getpriority sys_getpriority sys_getpriority
9297 common setpriority sys_setpriority sys_setpriority
9399 common statfs sys_statfs compat_sys_statfs
94100 common fstatfs sys_fstatfs compat_sys_fstatfs
95101 32 ioperm - -
96102 common socketcall sys_socketcall compat_sys_socketcall
97103 common syslog sys_syslog compat_sys_syslog
98104 common setitimer sys_setitimer compat_sys_setitimer
99105 common getitimer sys_getitimer compat_sys_getitimer
100106 common stat sys_newstat compat_sys_newstat
101107 common lstat sys_newlstat compat_sys_newlstat
102108 common fstat sys_newfstat compat_sys_newfstat
103110 common lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie
104111 common vhangup sys_vhangup sys_vhangup
105112 common idle - -
106114 common wait4 sys_wait4 compat_sys_wait4
107115 common swapoff sys_swapoff compat_sys_swapoff
108116 common sysinfo sys_sysinfo compat_sys_sysinfo
109117 common ipc sys_s390_ipc compat_sys_s390_ipc
110118 common fsync sys_fsync sys_fsync
111119 common sigreturn sys_sigreturn compat_sys_sigreturn
112120 common clone sys_clone compat_sys_clone
113121 common setdomainname sys_setdomainname compat_sys_setdomainname
114122 common uname sys_newuname compat_sys_newuname
115124 common adjtimex sys_adjtimex compat_sys_adjtimex
116125 common mprotect sys_mprotect compat_sys_mprotect
117126 common sigprocmask sys_sigprocmask compat_sys_sigprocmask
118127 common create_module - -
119128 common init_module sys_init_module compat_sys_init_module
120129 common delete_module sys_delete_module compat_sys_delete_module
121130 common get_kernel_syms - -
122131 common quotactl sys_quotactl compat_sys_quotactl
123132 common getpgid sys_getpgid sys_getpgid
124133 common fchdir sys_fchdir sys_fchdir
125134 common bdflush sys_bdflush compat_sys_bdflush
126135 common sysfs sys_sysfs compat_sys_sysfs
127136 common personality sys_s390_personality sys_s390_personality
128137 common afs_syscall - -
129138 32 setfsuid - compat_sys_s390_setfsuid16
130139 32 setfsgid - compat_sys_s390_setfsgid16
131140 32 _llseek - compat_sys_llseek
132141 common getdents sys_getdents compat_sys_getdents
133142 32 _newselect - compat_sys_select
134142 64 select sys_select -
135143 common flock sys_flock sys_flock
136144 common msync sys_msync compat_sys_msync
137145 common readv sys_readv compat_sys_readv
138146 common writev sys_writev compat_sys_writev
139147 common getsid sys_getsid sys_getsid
140148 common fdatasync sys_fdatasync sys_fdatasync
141149 common _sysctl sys_sysctl compat_sys_sysctl
142150 common mlock sys_mlock compat_sys_mlock
143151 common munlock sys_munlock compat_sys_munlock
144152 common mlockall sys_mlockall sys_mlockall
145153 common munlockall sys_munlockall sys_munlockall
146154 common sched_setparam sys_sched_setparam compat_sys_sched_setparam
147155 common sched_getparam sys_sched_getparam compat_sys_sched_getparam
148156 common sched_setscheduler sys_sched_setscheduler compat_sys_sched_setscheduler
149157 common sched_getscheduler sys_sched_getscheduler sys_sched_getscheduler
150158 common sched_yield sys_sched_yield sys_sched_yield
151159 common sched_get_priority_max sys_sched_get_priority_max sys_sched_get_priority_max
152160 common sched_get_priority_min sys_sched_get_priority_min sys_sched_get_priority_min
153161 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval
154162 common nanosleep sys_nanosleep compat_sys_nanosleep
155163 common mremap sys_mremap compat_sys_mremap
156164 32 setresuid - compat_sys_s390_setresuid16
157165 32 getresuid - compat_sys_s390_getresuid16
158167 common query_module - -
159168 common poll sys_poll compat_sys_poll
160169 common nfsservctl - -
161170 32 setresgid - compat_sys_s390_setresgid16
162171 32 getresgid - compat_sys_s390_getresgid16
163172 common prctl sys_prctl compat_sys_prctl
164173 common rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn
165174 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction
166175 common rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask
167176 common rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending
168177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
169178 common rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
170179 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend
171180 common pread64 sys_pread64 compat_sys_s390_pread64
172181 common pwrite64 sys_pwrite64 compat_sys_s390_pwrite64
173182 32 chown - compat_sys_s390_chown16
174183 common getcwd sys_getcwd compat_sys_getcwd
175184 common capget sys_capget compat_sys_capget
176185 common capset sys_capset compat_sys_capset
177186 common sigaltstack sys_sigaltstack compat_sys_sigaltstack
178187 common sendfile sys_sendfile64 compat_sys_sendfile
179188 common getpmsg - -
180189 common putpmsg - -
181190 common vfork sys_vfork sys_vfork
182191 32 ugetrlimit - compat_sys_getrlimit
183191 64 getrlimit sys_getrlimit -
184192 32 mmap2 - compat_sys_s390_mmap2
185193 32 truncate64 - compat_sys_s390_truncate64
186194 32 ftruncate64 - compat_sys_s390_ftruncate64
187195 32 stat64 - compat_sys_s390_stat64
188196 32 lstat64 - compat_sys_s390_lstat64
189197 32 fstat64 - compat_sys_s390_fstat64
190198 32 lchown32 - compat_sys_lchown
191198 64 lchown sys_lchown -
192199 32 getuid32 - sys_getuid
193199 64 getuid sys_getuid -
194200 32 getgid32 - sys_getgid
195200 64 getgid sys_getgid -
196201 32 geteuid32 - sys_geteuid
197201 64 geteuid sys_geteuid -
198202 32 getegid32 - sys_getegid
199202 64 getegid sys_getegid -
200203 32 setreuid32 - sys_setreuid
201203 64 setreuid sys_setreuid -
202204 32 setregid32 - sys_setregid
203204 64 setregid sys_setregid -
204205 32 getgroups32 - compat_sys_getgroups
205205 64 getgroups sys_getgroups -
206206 32 setgroups32 - compat_sys_setgroups
207206 64 setgroups sys_setgroups -
208207 32 fchown32 - sys_fchown
209207 64 fchown sys_fchown -
210208 32 setresuid32 - sys_setresuid
211208 64 setresuid sys_setresuid -
212209 32 getresuid32 - compat_sys_getresuid
213209 64 getresuid sys_getresuid -
214210 32 setresgid32 - sys_setresgid
215210 64 setresgid sys_setresgid -
216211 32 getresgid32 - compat_sys_getresgid
217211 64 getresgid sys_getresgid -
218212 32 chown32 - compat_sys_chown
219212 64 chown sys_chown -
220213 32 setuid32 - sys_setuid
221213 64 setuid sys_setuid -
222214 32 setgid32 - sys_setgid
223214 64 setgid sys_setgid -
224215 32 setfsuid32 - sys_setfsuid
225215 64 setfsuid sys_setfsuid -
226216 32 setfsgid32 - sys_setfsgid
227216 64 setfsgid sys_setfsgid -
228217 common pivot_root sys_pivot_root compat_sys_pivot_root
229218 common mincore sys_mincore compat_sys_mincore
230219 common madvise sys_madvise compat_sys_madvise
231220 common getdents64 sys_getdents64 compat_sys_getdents64
232221 32 fcntl64 - compat_sys_fcntl64
233222 common readahead sys_readahead compat_sys_s390_readahead
234223 32 sendfile64 - compat_sys_sendfile64
235224 common setxattr sys_setxattr compat_sys_setxattr
236225 common lsetxattr sys_lsetxattr compat_sys_lsetxattr
237226 common fsetxattr sys_fsetxattr compat_sys_fsetxattr
238227 common getxattr sys_getxattr compat_sys_getxattr
239228 common lgetxattr sys_lgetxattr compat_sys_lgetxattr
240229 common fgetxattr sys_fgetxattr compat_sys_fgetxattr
241230 common listxattr sys_listxattr compat_sys_listxattr
242231 common llistxattr sys_llistxattr compat_sys_llistxattr
243232 common flistxattr sys_flistxattr compat_sys_flistxattr
244233 common removexattr sys_removexattr compat_sys_removexattr
245234 common lremovexattr sys_lremovexattr compat_sys_lremovexattr
246235 common fremovexattr sys_fremovexattr compat_sys_fremovexattr
247236 common gettid sys_gettid sys_gettid
248237 common tkill sys_tkill sys_tkill
249238 common futex sys_futex compat_sys_futex
250239 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity
251240 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity
252241 common tgkill sys_tgkill sys_tgkill
253243 common io_setup sys_io_setup compat_sys_io_setup
254244 common io_destroy sys_io_destroy compat_sys_io_destroy
255245 common io_getevents sys_io_getevents compat_sys_io_getevents
256246 common io_submit sys_io_submit compat_sys_io_submit
257247 common io_cancel sys_io_cancel compat_sys_io_cancel
258248 common exit_group sys_exit_group sys_exit_group
259249 common epoll_create sys_epoll_create sys_epoll_create
260250 common epoll_ctl sys_epoll_ctl compat_sys_epoll_ctl
261251 common epoll_wait sys_epoll_wait compat_sys_epoll_wait
262252 common set_tid_address sys_set_tid_address compat_sys_set_tid_address
263253 common fadvise64 sys_fadvise64_64 compat_sys_s390_fadvise64
264254 common timer_create sys_timer_create compat_sys_timer_create
265255 common timer_settime sys_timer_settime compat_sys_timer_settime
266256 common timer_gettime sys_timer_gettime compat_sys_timer_gettime
267257 common timer_getoverrun sys_timer_getoverrun sys_timer_getoverrun
268258 common timer_delete sys_timer_delete sys_timer_delete
269259 common clock_settime sys_clock_settime compat_sys_clock_settime
270260 common clock_gettime sys_clock_gettime compat_sys_clock_gettime
271261 common clock_getres sys_clock_getres compat_sys_clock_getres
272262 common clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep
273264 32 fadvise64_64 - compat_sys_s390_fadvise64_64
274265 common statfs64 sys_statfs64 compat_sys_statfs64
275266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
276267 common remap_file_pages sys_remap_file_pages compat_sys_remap_file_pages
277268 common mbind sys_mbind compat_sys_mbind
278269 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
279270 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy
280271 common mq_open sys_mq_open compat_sys_mq_open
281272 common mq_unlink sys_mq_unlink compat_sys_mq_unlink
282273 common mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend
283274 common mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive
284275 common mq_notify sys_mq_notify compat_sys_mq_notify
285276 common mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr
286277 common kexec_load sys_kexec_load compat_sys_kexec_load
287278 common add_key sys_add_key compat_sys_add_key
288279 common request_key sys_request_key compat_sys_request_key
289280 common keyctl sys_keyctl compat_sys_keyctl
290281 common waitid sys_waitid compat_sys_waitid
291282 common ioprio_set sys_ioprio_set sys_ioprio_set
292283 common ioprio_get sys_ioprio_get sys_ioprio_get
293284 common inotify_init sys_inotify_init sys_inotify_init
294285 common inotify_add_watch sys_inotify_add_watch compat_sys_inotify_add_watch
295286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch
296287 common migrate_pages sys_migrate_pages compat_sys_migrate_pages
297288 common openat sys_openat compat_sys_openat
298289 common mkdirat sys_mkdirat compat_sys_mkdirat
299290 common mknodat sys_mknodat compat_sys_mknodat
300291 common fchownat sys_fchownat compat_sys_fchownat
301292 common futimesat sys_futimesat compat_sys_futimesat
302293 32 fstatat64 - compat_sys_s390_fstatat64
303293 64 newfstatat sys_newfstatat -
304294 common unlinkat sys_unlinkat compat_sys_unlinkat
305295 common renameat sys_renameat compat_sys_renameat
306296 common linkat sys_linkat compat_sys_linkat
307297 common symlinkat sys_symlinkat compat_sys_symlinkat
308298 common readlinkat sys_readlinkat compat_sys_readlinkat
309299 common fchmodat sys_fchmodat compat_sys_fchmodat
310300 common faccessat sys_faccessat compat_sys_faccessat
311301 common pselect6 sys_pselect6 compat_sys_pselect6
312302 common ppoll sys_ppoll compat_sys_ppoll
313303 common unshare sys_unshare compat_sys_unshare
314304 common set_robust_list sys_set_robust_list compat_sys_set_robust_list
315305 common get_robust_list sys_get_robust_list compat_sys_get_robust_list
316306 common splice sys_splice compat_sys_splice
317307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range
318308 common tee sys_tee compat_sys_tee
319309 common vmsplice sys_vmsplice compat_sys_vmsplice
320310 common move_pages sys_move_pages compat_sys_move_pages
321311 common getcpu sys_getcpu compat_sys_getcpu
322312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait
323313 common utimes sys_utimes compat_sys_utimes
324314 common fallocate sys_fallocate compat_sys_s390_fallocate
325315 common utimensat sys_utimensat compat_sys_utimensat
326316 common signalfd sys_signalfd compat_sys_signalfd
327317 common timerfd - -
328318 common eventfd sys_eventfd sys_eventfd
329319 common timerfd_create sys_timerfd_create sys_timerfd_create
330320 common timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime
331321 common timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime
332322 common signalfd4 sys_signalfd4 compat_sys_signalfd4
333323 common eventfd2 sys_eventfd2 sys_eventfd2
334324 common inotify_init1 sys_inotify_init1 sys_inotify_init1
335325 common pipe2 sys_pipe2 compat_sys_pipe2
336326 common dup3 sys_dup3 sys_dup3
337327 common epoll_create1 sys_epoll_create1 sys_epoll_create1
338328 common preadv sys_preadv compat_sys_preadv
339329 common pwritev sys_pwritev compat_sys_pwritev
340330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
341331 common perf_event_open sys_perf_event_open compat_sys_perf_event_open
342332 common fanotify_init sys_fanotify_init sys_fanotify_init
343333 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark
344334 common prlimit64 sys_prlimit64 compat_sys_prlimit64
345335 common name_to_handle_at sys_name_to_handle_at compat_sys_name_to_handle_at
346336 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at
347337 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime
348338 common syncfs sys_syncfs sys_syncfs
349339 common setns sys_setns sys_setns
350340 common process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
351341 common process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
352342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr
353343 common kcmp sys_kcmp compat_sys_kcmp
354344 common finit_module sys_finit_module compat_sys_finit_module
355345 common sched_setattr sys_sched_setattr compat_sys_sched_setattr
356346 common sched_getattr sys_sched_getattr compat_sys_sched_getattr
357347 common renameat2 sys_renameat2 compat_sys_renameat2
358348 common seccomp sys_seccomp compat_sys_seccomp
359349 common getrandom sys_getrandom compat_sys_getrandom
360350 common memfd_create sys_memfd_create compat_sys_memfd_create
361351 common bpf sys_bpf compat_sys_bpf
362352 common s390_pci_mmio_write sys_s390_pci_mmio_write compat_sys_s390_pci_mmio_write
363353 common s390_pci_mmio_read sys_s390_pci_mmio_read compat_sys_s390_pci_mmio_read
364354 common execveat sys_execveat compat_sys_execveat
365355 common userfaultfd sys_userfaultfd sys_userfaultfd
366356 common membarrier sys_membarrier sys_membarrier
367357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg
368358 common sendmmsg sys_sendmmsg compat_sys_sendmmsg
369359 common socket sys_socket sys_socket
370360 common socketpair sys_socketpair compat_sys_socketpair
371361 common bind sys_bind compat_sys_bind
372362 common connect sys_connect compat_sys_connect
373363 common listen sys_listen sys_listen
374364 common accept4 sys_accept4 compat_sys_accept4
375365 common getsockopt sys_getsockopt compat_sys_getsockopt
376366 common setsockopt sys_setsockopt compat_sys_setsockopt
377367 common getsockname sys_getsockname compat_sys_getsockname
378368 common getpeername sys_getpeername compat_sys_getpeername
379369 common sendto sys_sendto compat_sys_sendto
380370 common sendmsg sys_sendmsg compat_sys_sendmsg
381371 common recvfrom sys_recvfrom compat_sys_recvfrom
382372 common recvmsg sys_recvmsg compat_sys_recvmsg
383373 common shutdown sys_shutdown sys_shutdown
384374 common mlock2 sys_mlock2 compat_sys_mlock2
385375 common copy_file_range sys_copy_file_range compat_sys_copy_file_range
386376 common preadv2 sys_preadv2 compat_sys_preadv2
387377 common pwritev2 sys_pwritev2 compat_sys_pwritev2
388378 common s390_guarded_storage sys_s390_guarded_storage compat_sys_s390_guarded_storage
389379 common statx sys_statx compat_sys_statx
390380 common s390_sthyi sys_s390_sthyi compat_sys_s390_sthyi
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index c0815a37fdb5..539c3d460158 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2245,7 +2245,7 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he)
2245 c2c_browser__update_nr_entries(browser); 2245 c2c_browser__update_nr_entries(browser);
2246 2246
2247 while (1) { 2247 while (1) {
2248 key = hist_browser__run(browser, "? - help"); 2248 key = hist_browser__run(browser, "? - help", true);
2249 2249
2250 switch (key) { 2250 switch (key) {
2251 case 's': 2251 case 's':
@@ -2314,7 +2314,7 @@ static int perf_c2c__hists_browse(struct hists *hists)
2314 c2c_browser__update_nr_entries(browser); 2314 c2c_browser__update_nr_entries(browser);
2315 2315
2316 while (1) { 2316 while (1) {
2317 key = hist_browser__run(browser, "? - help"); 2317 key = hist_browser__run(browser, "? - help", true);
2318 2318
2319 switch (key) { 2319 switch (key) {
2320 case 'q': 2320 case 'q':
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 42a52dcc41cd..4ad5dc649716 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -530,7 +530,8 @@ static int report__browse_hists(struct report *rep)
530 case 1: 530 case 1:
531 ret = perf_evlist__tui_browse_hists(evlist, help, NULL, 531 ret = perf_evlist__tui_browse_hists(evlist, help, NULL,
532 rep->min_percent, 532 rep->min_percent,
533 &session->header.env); 533 &session->header.env,
534 true);
534 /* 535 /*
535 * Usually "ret" is the last pressed key, and we only 536 * Usually "ret" is the last pressed key, and we only
536 * care if the key notifies us to switch data file. 537 * care if the key notifies us to switch data file.
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index c6ccda52117d..b7c823ba8374 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -283,8 +283,9 @@ static void perf_top__print_sym_table(struct perf_top *top)
283 283
284 printf("%-*.*s\n", win_width, win_width, graph_dotted_line); 284 printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
285 285
286 if (hists->stats.nr_lost_warned != 286 if (!top->record_opts.overwrite &&
287 hists->stats.nr_events[PERF_RECORD_LOST]) { 287 (hists->stats.nr_lost_warned !=
288 hists->stats.nr_events[PERF_RECORD_LOST])) {
288 hists->stats.nr_lost_warned = 289 hists->stats.nr_lost_warned =
289 hists->stats.nr_events[PERF_RECORD_LOST]; 290 hists->stats.nr_events[PERF_RECORD_LOST];
290 color_fprintf(stdout, PERF_COLOR_RED, 291 color_fprintf(stdout, PERF_COLOR_RED,
@@ -611,7 +612,8 @@ static void *display_thread_tui(void *arg)
611 612
612 perf_evlist__tui_browse_hists(top->evlist, help, &hbt, 613 perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
613 top->min_percent, 614 top->min_percent,
614 &top->session->header.env); 615 &top->session->header.env,
616 !top->record_opts.overwrite);
615 617
616 done = 1; 618 done = 1;
617 return NULL; 619 return NULL;
@@ -807,15 +809,23 @@ static void perf_event__process_sample(struct perf_tool *tool,
807 809
808static void perf_top__mmap_read_idx(struct perf_top *top, int idx) 810static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
809{ 811{
812 struct record_opts *opts = &top->record_opts;
813 struct perf_evlist *evlist = top->evlist;
810 struct perf_sample sample; 814 struct perf_sample sample;
811 struct perf_evsel *evsel; 815 struct perf_evsel *evsel;
816 struct perf_mmap *md;
812 struct perf_session *session = top->session; 817 struct perf_session *session = top->session;
813 union perf_event *event; 818 union perf_event *event;
814 struct machine *machine; 819 struct machine *machine;
820 u64 end, start;
815 int ret; 821 int ret;
816 822
817 while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) { 823 md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx];
818 ret = perf_evlist__parse_sample(top->evlist, event, &sample); 824 if (perf_mmap__read_init(md, opts->overwrite, &start, &end) < 0)
825 return;
826
827 while ((event = perf_mmap__read_event(md, opts->overwrite, &start, end)) != NULL) {
828 ret = perf_evlist__parse_sample(evlist, event, &sample);
819 if (ret) { 829 if (ret) {
820 pr_err("Can't parse sample, err = %d\n", ret); 830 pr_err("Can't parse sample, err = %d\n", ret);
821 goto next_event; 831 goto next_event;
@@ -869,16 +879,120 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
869 } else 879 } else
870 ++session->evlist->stats.nr_unknown_events; 880 ++session->evlist->stats.nr_unknown_events;
871next_event: 881next_event:
872 perf_evlist__mmap_consume(top->evlist, idx); 882 perf_mmap__consume(md, opts->overwrite);
873 } 883 }
884
885 perf_mmap__read_done(md);
874} 886}
875 887
876static void perf_top__mmap_read(struct perf_top *top) 888static void perf_top__mmap_read(struct perf_top *top)
877{ 889{
890 bool overwrite = top->record_opts.overwrite;
891 struct perf_evlist *evlist = top->evlist;
892 unsigned long long start, end;
878 int i; 893 int i;
879 894
895 start = rdclock();
896 if (overwrite)
897 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING);
898
880 for (i = 0; i < top->evlist->nr_mmaps; i++) 899 for (i = 0; i < top->evlist->nr_mmaps; i++)
881 perf_top__mmap_read_idx(top, i); 900 perf_top__mmap_read_idx(top, i);
901
902 if (overwrite) {
903 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
904 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
905 }
906 end = rdclock();
907
908 if ((end - start) > (unsigned long long)top->delay_secs * NSEC_PER_SEC)
909 ui__warning("Too slow to read ring buffer.\n"
910 "Please try increasing the period (-c) or\n"
911 "decreasing the freq (-F) or\n"
912 "limiting the number of CPUs (-C)\n");
913}
914
915/*
916 * Check per-event overwrite term.
917 * perf top should support consistent term for all events.
918 * - All events don't have per-event term
919 * E.g. "cpu/cpu-cycles/,cpu/instructions/"
920 * Nothing change, return 0.
921 * - All events have same per-event term
922 * E.g. "cpu/cpu-cycles,no-overwrite/,cpu/instructions,no-overwrite/
923 * Using the per-event setting to replace the opts->overwrite if
924 * they are different, then return 0.
925 * - Events have different per-event term
926 * E.g. "cpu/cpu-cycles,overwrite/,cpu/instructions,no-overwrite/"
927 * Return -1
928 * - Some of the event set per-event term, but some not.
929 * E.g. "cpu/cpu-cycles/,cpu/instructions,no-overwrite/"
930 * Return -1
931 */
932static int perf_top__overwrite_check(struct perf_top *top)
933{
934 struct record_opts *opts = &top->record_opts;
935 struct perf_evlist *evlist = top->evlist;
936 struct perf_evsel_config_term *term;
937 struct list_head *config_terms;
938 struct perf_evsel *evsel;
939 int set, overwrite = -1;
940
941 evlist__for_each_entry(evlist, evsel) {
942 set = -1;
943 config_terms = &evsel->config_terms;
944 list_for_each_entry(term, config_terms, list) {
945 if (term->type == PERF_EVSEL__CONFIG_TERM_OVERWRITE)
946 set = term->val.overwrite ? 1 : 0;
947 }
948
949 /* no term for current and previous event (likely) */
950 if ((overwrite < 0) && (set < 0))
951 continue;
952
953 /* has term for both current and previous event, compare */
954 if ((overwrite >= 0) && (set >= 0) && (overwrite != set))
955 return -1;
956
957 /* no term for current event but has term for previous one */
958 if ((overwrite >= 0) && (set < 0))
959 return -1;
960
961 /* has term for current event */
962 if ((overwrite < 0) && (set >= 0)) {
963 /* if it's first event, set overwrite */
964 if (evsel == perf_evlist__first(evlist))
965 overwrite = set;
966 else
967 return -1;
968 }
969 }
970
971 if ((overwrite >= 0) && (opts->overwrite != overwrite))
972 opts->overwrite = overwrite;
973
974 return 0;
975}
976
977static int perf_top_overwrite_fallback(struct perf_top *top,
978 struct perf_evsel *evsel)
979{
980 struct record_opts *opts = &top->record_opts;
981 struct perf_evlist *evlist = top->evlist;
982 struct perf_evsel *counter;
983
984 if (!opts->overwrite)
985 return 0;
986
987 /* only fall back when first event fails */
988 if (evsel != perf_evlist__first(evlist))
989 return 0;
990
991 evlist__for_each_entry(evlist, counter)
992 counter->attr.write_backward = false;
993 opts->overwrite = false;
994 ui__warning("fall back to non-overwrite mode\n");
995 return 1;
882} 996}
883 997
884static int perf_top__start_counters(struct perf_top *top) 998static int perf_top__start_counters(struct perf_top *top)
@@ -888,12 +1002,33 @@ static int perf_top__start_counters(struct perf_top *top)
888 struct perf_evlist *evlist = top->evlist; 1002 struct perf_evlist *evlist = top->evlist;
889 struct record_opts *opts = &top->record_opts; 1003 struct record_opts *opts = &top->record_opts;
890 1004
1005 if (perf_top__overwrite_check(top)) {
1006 ui__error("perf top only support consistent per-event "
1007 "overwrite setting for all events\n");
1008 goto out_err;
1009 }
1010
891 perf_evlist__config(evlist, opts, &callchain_param); 1011 perf_evlist__config(evlist, opts, &callchain_param);
892 1012
893 evlist__for_each_entry(evlist, counter) { 1013 evlist__for_each_entry(evlist, counter) {
894try_again: 1014try_again:
895 if (perf_evsel__open(counter, top->evlist->cpus, 1015 if (perf_evsel__open(counter, top->evlist->cpus,
896 top->evlist->threads) < 0) { 1016 top->evlist->threads) < 0) {
1017
1018 /*
1019 * Specially handle overwrite fall back.
1020 * Because perf top is the only tool which has
1021 * overwrite mode by default, support
1022 * both overwrite and non-overwrite mode, and
1023 * require consistent mode for all events.
1024 *
1025 * May move it to generic code with more tools
1026 * have similar attribute.
1027 */
1028 if (perf_missing_features.write_backward &&
1029 perf_top_overwrite_fallback(top, counter))
1030 goto try_again;
1031
897 if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { 1032 if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
898 if (verbose > 0) 1033 if (verbose > 0)
899 ui__warning("%s\n", msg); 1034 ui__warning("%s\n", msg);
@@ -1033,7 +1168,7 @@ static int __cmd_top(struct perf_top *top)
1033 1168
1034 perf_top__mmap_read(top); 1169 perf_top__mmap_read(top);
1035 1170
1036 if (hits == top->samples) 1171 if (opts->overwrite || (hits == top->samples))
1037 ret = perf_evlist__poll(top->evlist, 100); 1172 ret = perf_evlist__poll(top->evlist, 100);
1038 1173
1039 if (resize) { 1174 if (resize) {
@@ -1127,6 +1262,7 @@ int cmd_top(int argc, const char **argv)
1127 .uses_mmap = true, 1262 .uses_mmap = true,
1128 }, 1263 },
1129 .proc_map_timeout = 500, 1264 .proc_map_timeout = 500,
1265 .overwrite = 1,
1130 }, 1266 },
1131 .max_stack = sysctl_perf_event_max_stack, 1267 .max_stack = sysctl_perf_event_max_stack,
1132 .sym_pcnt_filter = 5, 1268 .sym_pcnt_filter = 5,
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 51abdb0a4047..790ec25919a0 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -33,7 +33,6 @@ arch/s390/include/uapi/asm/kvm.h
33arch/s390/include/uapi/asm/kvm_perf.h 33arch/s390/include/uapi/asm/kvm_perf.h
34arch/s390/include/uapi/asm/ptrace.h 34arch/s390/include/uapi/asm/ptrace.h
35arch/s390/include/uapi/asm/sie.h 35arch/s390/include/uapi/asm/sie.h
36arch/s390/include/uapi/asm/unistd.h
37arch/arm/include/uapi/asm/kvm.h 36arch/arm/include/uapi/asm/kvm.h
38arch/arm64/include/uapi/asm/kvm.h 37arch/arm64/include/uapi/asm/kvm.h
39arch/alpha/include/uapi/asm/errno.h 38arch/alpha/include/uapi/asm/errno.h
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json
new file mode 100644
index 000000000000..3b6208763e50
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json
@@ -0,0 +1,27 @@
1[
2 {,
3 "EventCode": "0x7A",
4 "EventName": "BR_INDIRECT_SPEC",
5 "BriefDescription": "Branch speculatively executed - Indirect branch"
6 },
7 {,
8 "EventCode": "0xC9",
9 "EventName": "BR_COND",
10 "BriefDescription": "Conditional branch executed"
11 },
12 {,
13 "EventCode": "0xCA",
14 "EventName": "BR_INDIRECT_MISPRED",
15 "BriefDescription": "Indirect branch mispredicted"
16 },
17 {,
18 "EventCode": "0xCB",
19 "EventName": "BR_INDIRECT_MISPRED_ADDR",
20 "BriefDescription": "Indirect branch mispredicted because of address miscompare"
21 },
22 {,
23 "EventCode": "0xCC",
24 "EventName": "BR_COND_MISPRED",
25 "BriefDescription": "Conditional branch mispredicted"
26 }
27]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json
new file mode 100644
index 000000000000..480d9f7460ab
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json
@@ -0,0 +1,22 @@
1[
2 {,
3 "EventCode": "0x60",
4 "EventName": "BUS_ACCESS_LD",
5 "BriefDescription": "Bus access - Read"
6 },
7 {,
8 "EventCode": "0x61",
9 "EventName": "BUS_ACCESS_ST",
10 "BriefDescription": "Bus access - Write"
11 },
12 {,
13 "EventCode": "0xC0",
14 "EventName": "EXT_MEM_REQ",
15 "BriefDescription": "External memory request"
16 },
17 {,
18 "EventCode": "0xC1",
19 "EventName": "EXT_MEM_REQ_NC",
20 "BriefDescription": "Non-cacheable external memory request"
21 }
22]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json
new file mode 100644
index 000000000000..11baad6344b9
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json
@@ -0,0 +1,27 @@
1[
2 {,
3 "EventCode": "0xC2",
4 "EventName": "PREFETCH_LINEFILL",
5 "BriefDescription": "Linefill because of prefetch"
6 },
7 {,
8 "EventCode": "0xC3",
9 "EventName": "PREFETCH_LINEFILL_DROP",
10 "BriefDescription": "Instruction Cache Throttle occurred"
11 },
12 {,
13 "EventCode": "0xC4",
14 "EventName": "READ_ALLOC_ENTER",
15 "BriefDescription": "Entering read allocate mode"
16 },
17 {,
18 "EventCode": "0xC5",
19 "EventName": "READ_ALLOC",
20 "BriefDescription": "Read allocate mode"
21 },
22 {,
23 "EventCode": "0xC8",
24 "EventName": "EXT_SNOOP",
25 "BriefDescription": "SCU Snooped data from another CPU for this CPU"
26 }
27]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json
new file mode 100644
index 000000000000..480d9f7460ab
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json
@@ -0,0 +1,22 @@
1[
2 {,
3 "EventCode": "0x60",
4 "EventName": "BUS_ACCESS_LD",
5 "BriefDescription": "Bus access - Read"
6 },
7 {,
8 "EventCode": "0x61",
9 "EventName": "BUS_ACCESS_ST",
10 "BriefDescription": "Bus access - Write"
11 },
12 {,
13 "EventCode": "0xC0",
14 "EventName": "EXT_MEM_REQ",
15 "BriefDescription": "External memory request"
16 },
17 {,
18 "EventCode": "0xC1",
19 "EventName": "EXT_MEM_REQ_NC",
20 "BriefDescription": "Non-cacheable external memory request"
21 }
22]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json
new file mode 100644
index 000000000000..73a22402d003
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json
@@ -0,0 +1,32 @@
1[
2 {,
3 "EventCode": "0x86",
4 "EventName": "EXC_IRQ",
5 "BriefDescription": "Exception taken, IRQ"
6 },
7 {,
8 "EventCode": "0x87",
9 "EventName": "EXC_FIQ",
10 "BriefDescription": "Exception taken, FIQ"
11 },
12 {,
13 "EventCode": "0xC6",
14 "EventName": "PRE_DECODE_ERR",
15 "BriefDescription": "Pre-decode error"
16 },
17 {,
18 "EventCode": "0xD0",
19 "EventName": "L1I_CACHE_ERR",
20 "BriefDescription": "L1 Instruction Cache (data or tag) memory error"
21 },
22 {,
23 "EventCode": "0xD1",
24 "EventName": "L1D_CACHE_ERR",
25 "BriefDescription": "L1 Data Cache (data, tag or dirty) memory error, correctable or non-correctable"
26 },
27 {,
28 "EventCode": "0xD2",
29 "EventName": "TLB_ERR",
30 "BriefDescription": "TLB memory error"
31 }
32]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json
new file mode 100644
index 000000000000..3149fb90555a
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json
@@ -0,0 +1,52 @@
1[
2 {,
3 "EventCode": "0xC7",
4 "EventName": "STALL_SB_FULL",
5 "BriefDescription": "Data Write operation that stalls the pipeline because the store buffer is full"
6 },
7 {,
8 "EventCode": "0xE0",
9 "EventName": "OTHER_IQ_DEP_STALL",
10 "BriefDescription": "Cycles that the DPU IQ is empty and that is not because of a recent micro-TLB miss, instruction cache miss or pre-decode error"
11 },
12 {,
13 "EventCode": "0xE1",
14 "EventName": "IC_DEP_STALL",
15 "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction cache miss being processed"
16 },
17 {,
18 "EventCode": "0xE2",
19 "EventName": "IUTLB_DEP_STALL",
20 "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction micro-TLB miss being processed"
21 },
22 {,
23 "EventCode": "0xE3",
24 "EventName": "DECODE_DEP_STALL",
25 "BriefDescription": "Cycles the DPU IQ is empty and there is a pre-decode error being processed"
26 },
27 {,
28 "EventCode": "0xE4",
29 "EventName": "OTHER_INTERLOCK_STALL",
30 "BriefDescription": "Cycles there is an interlock other than Advanced SIMD/Floating-point instructions or load/store instruction"
31 },
32 {,
33 "EventCode": "0xE5",
34 "EventName": "AGU_DEP_STALL",
35 "BriefDescription": "Cycles there is an interlock for a load/store instruction waiting for data to calculate the address in the AGU"
36 },
37 {,
38 "EventCode": "0xE6",
39 "EventName": "SIMD_DEP_STALL",
40 "BriefDescription": "Cycles there is an interlock for an Advanced SIMD/Floating-point operation."
41 },
42 {,
43 "EventCode": "0xE7",
44 "EventName": "LD_DEP_STALL",
45 "BriefDescription": "Cycles there is a stall in the Wr stage because of a load miss"
46 },
47 {,
48 "EventCode": "0xE8",
49 "EventName": "ST_DEP_STALL",
50 "BriefDescription": "Cycles there is a stall in the Wr stage because of a store"
51 }
52]
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
index 219d6756134e..e61c9ca6cf9e 100644
--- a/tools/perf/pmu-events/arch/arm64/mapfile.csv
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -13,3 +13,4 @@
13# 13#
14#Family-model,Version,Filename,EventType 14#Family-model,Version,Filename,EventType
150x00000000420f5160,v1,cavium,core 150x00000000420f5160,v1,cavium,core
160x00000000410fd03[[:xdigit:]],v1,cortex-a53,core
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index 4035d43523c3..e0b1b414d466 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -31,10 +31,12 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count,
31 int i; 31 int i;
32 32
33 for (i = 0; i < evlist->nr_mmaps; i++) { 33 for (i = 0; i < evlist->nr_mmaps; i++) {
34 struct perf_mmap *map = &evlist->overwrite_mmap[i];
34 union perf_event *event; 35 union perf_event *event;
36 u64 start, end;
35 37
36 perf_mmap__read_catchup(&evlist->overwrite_mmap[i]); 38 perf_mmap__read_init(map, true, &start, &end);
37 while ((event = perf_mmap__read_backward(&evlist->overwrite_mmap[i])) != NULL) { 39 while ((event = perf_mmap__read_event(map, true, &start, end)) != NULL) {
38 const u32 type = event->header.type; 40 const u32 type = event->header.type;
39 41
40 switch (type) { 42 switch (type) {
@@ -49,6 +51,7 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count,
49 return TEST_FAIL; 51 return TEST_FAIL;
50 } 52 }
51 } 53 }
54 perf_mmap__read_done(map);
52 } 55 }
53 return TEST_OK; 56 return TEST_OK;
54} 57}
diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
index 8b3da21a08f1..c446c894b297 100755
--- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
@@ -22,10 +22,23 @@ trace_libc_inet_pton_backtrace() {
22 expected[4]="rtt min.*" 22 expected[4]="rtt min.*"
23 expected[5]="[0-9]+\.[0-9]+[[:space:]]+probe_libc:inet_pton:\([[:xdigit:]]+\)" 23 expected[5]="[0-9]+\.[0-9]+[[:space:]]+probe_libc:inet_pton:\([[:xdigit:]]+\)"
24 expected[6]=".*inet_pton[[:space:]]\($libc\)$" 24 expected[6]=".*inet_pton[[:space:]]\($libc\)$"
25 expected[7]="getaddrinfo[[:space:]]\($libc\)$" 25 case "$(uname -m)" in
26 expected[8]=".*\(.*/bin/ping.*\)$" 26 s390x)
27 27 eventattr='call-graph=dwarf'
28 perf trace --no-syscalls -e probe_libc:inet_pton/max-stack=3/ ping -6 -c 1 ::1 2>&1 | grep -v ^$ | while read line ; do 28 expected[7]="gaih_inet[[:space:]]\(inlined\)$"
29 expected[8]="__GI_getaddrinfo[[:space:]]\(inlined\)$"
30 expected[9]="main[[:space:]]\(.*/bin/ping.*\)$"
31 expected[10]="__libc_start_main[[:space:]]\($libc\)$"
32 expected[11]="_start[[:space:]]\(.*/bin/ping.*\)$"
33 ;;
34 *)
35 eventattr='max-stack=3'
36 expected[7]="getaddrinfo[[:space:]]\($libc\)$"
37 expected[8]=".*\(.*/bin/ping.*\)$"
38 ;;
39 esac
40
41 perf trace --no-syscalls -e probe_libc:inet_pton/$eventattr/ ping -6 -c 1 ::1 2>&1 | grep -v ^$ | while read line ; do
29 echo $line 42 echo $line
30 echo "$line" | egrep -q "${expected[$idx]}" 43 echo "$line" | egrep -q "${expected[$idx]}"
31 if [ $? -ne 0 ] ; then 44 if [ $? -ne 0 ] ; then
@@ -33,7 +46,7 @@ trace_libc_inet_pton_backtrace() {
33 exit 1 46 exit 1
34 fi 47 fi
35 let idx+=1 48 let idx+=1
36 [ $idx -eq 9 ] && break 49 [ -z "${expected[$idx]}" ] && break
37 done 50 done
38} 51}
39 52
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 68146f4620a5..6495ee55d9c3 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -608,7 +608,8 @@ static int hist_browser__title(struct hist_browser *browser, char *bf, size_t si
608 return browser->title ? browser->title(browser, bf, size) : 0; 608 return browser->title ? browser->title(browser, bf, size) : 0;
609} 609}
610 610
611int hist_browser__run(struct hist_browser *browser, const char *help) 611int hist_browser__run(struct hist_browser *browser, const char *help,
612 bool warn_lost_event)
612{ 613{
613 int key; 614 int key;
614 char title[160]; 615 char title[160];
@@ -638,8 +639,9 @@ int hist_browser__run(struct hist_browser *browser, const char *help)
638 nr_entries = hist_browser__nr_entries(browser); 639 nr_entries = hist_browser__nr_entries(browser);
639 ui_browser__update_nr_entries(&browser->b, nr_entries); 640 ui_browser__update_nr_entries(&browser->b, nr_entries);
640 641
641 if (browser->hists->stats.nr_lost_warned != 642 if (warn_lost_event &&
642 browser->hists->stats.nr_events[PERF_RECORD_LOST]) { 643 (browser->hists->stats.nr_lost_warned !=
644 browser->hists->stats.nr_events[PERF_RECORD_LOST])) {
643 browser->hists->stats.nr_lost_warned = 645 browser->hists->stats.nr_lost_warned =
644 browser->hists->stats.nr_events[PERF_RECORD_LOST]; 646 browser->hists->stats.nr_events[PERF_RECORD_LOST];
645 ui_browser__warn_lost_events(&browser->b); 647 ui_browser__warn_lost_events(&browser->b);
@@ -2763,7 +2765,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
2763 bool left_exits, 2765 bool left_exits,
2764 struct hist_browser_timer *hbt, 2766 struct hist_browser_timer *hbt,
2765 float min_pcnt, 2767 float min_pcnt,
2766 struct perf_env *env) 2768 struct perf_env *env,
2769 bool warn_lost_event)
2767{ 2770{
2768 struct hists *hists = evsel__hists(evsel); 2771 struct hists *hists = evsel__hists(evsel);
2769 struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env); 2772 struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env);
@@ -2844,7 +2847,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
2844 2847
2845 nr_options = 0; 2848 nr_options = 0;
2846 2849
2847 key = hist_browser__run(browser, helpline); 2850 key = hist_browser__run(browser, helpline,
2851 warn_lost_event);
2848 2852
2849 if (browser->he_selection != NULL) { 2853 if (browser->he_selection != NULL) {
2850 thread = hist_browser__selected_thread(browser); 2854 thread = hist_browser__selected_thread(browser);
@@ -3184,7 +3188,8 @@ static void perf_evsel_menu__write(struct ui_browser *browser,
3184 3188
3185static int perf_evsel_menu__run(struct perf_evsel_menu *menu, 3189static int perf_evsel_menu__run(struct perf_evsel_menu *menu,
3186 int nr_events, const char *help, 3190 int nr_events, const char *help,
3187 struct hist_browser_timer *hbt) 3191 struct hist_browser_timer *hbt,
3192 bool warn_lost_event)
3188{ 3193{
3189 struct perf_evlist *evlist = menu->b.priv; 3194 struct perf_evlist *evlist = menu->b.priv;
3190 struct perf_evsel *pos; 3195 struct perf_evsel *pos;
@@ -3203,7 +3208,9 @@ static int perf_evsel_menu__run(struct perf_evsel_menu *menu,
3203 case K_TIMER: 3208 case K_TIMER:
3204 hbt->timer(hbt->arg); 3209 hbt->timer(hbt->arg);
3205 3210
3206 if (!menu->lost_events_warned && menu->lost_events) { 3211 if (!menu->lost_events_warned &&
3212 menu->lost_events &&
3213 warn_lost_event) {
3207 ui_browser__warn_lost_events(&menu->b); 3214 ui_browser__warn_lost_events(&menu->b);
3208 menu->lost_events_warned = true; 3215 menu->lost_events_warned = true;
3209 } 3216 }
@@ -3224,7 +3231,8 @@ browse_hists:
3224 key = perf_evsel__hists_browse(pos, nr_events, help, 3231 key = perf_evsel__hists_browse(pos, nr_events, help,
3225 true, hbt, 3232 true, hbt,
3226 menu->min_pcnt, 3233 menu->min_pcnt,
3227 menu->env); 3234 menu->env,
3235 warn_lost_event);
3228 ui_browser__show_title(&menu->b, title); 3236 ui_browser__show_title(&menu->b, title);
3229 switch (key) { 3237 switch (key) {
3230 case K_TAB: 3238 case K_TAB:
@@ -3282,7 +3290,8 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
3282 int nr_entries, const char *help, 3290 int nr_entries, const char *help,
3283 struct hist_browser_timer *hbt, 3291 struct hist_browser_timer *hbt,
3284 float min_pcnt, 3292 float min_pcnt,
3285 struct perf_env *env) 3293 struct perf_env *env,
3294 bool warn_lost_event)
3286{ 3295{
3287 struct perf_evsel *pos; 3296 struct perf_evsel *pos;
3288 struct perf_evsel_menu menu = { 3297 struct perf_evsel_menu menu = {
@@ -3309,13 +3318,15 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
3309 menu.b.width = line_len; 3318 menu.b.width = line_len;
3310 } 3319 }
3311 3320
3312 return perf_evsel_menu__run(&menu, nr_entries, help, hbt); 3321 return perf_evsel_menu__run(&menu, nr_entries, help,
3322 hbt, warn_lost_event);
3313} 3323}
3314 3324
3315int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, 3325int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
3316 struct hist_browser_timer *hbt, 3326 struct hist_browser_timer *hbt,
3317 float min_pcnt, 3327 float min_pcnt,
3318 struct perf_env *env) 3328 struct perf_env *env,
3329 bool warn_lost_event)
3319{ 3330{
3320 int nr_entries = evlist->nr_entries; 3331 int nr_entries = evlist->nr_entries;
3321 3332
@@ -3325,7 +3336,7 @@ single_entry:
3325 3336
3326 return perf_evsel__hists_browse(first, nr_entries, help, 3337 return perf_evsel__hists_browse(first, nr_entries, help,
3327 false, hbt, min_pcnt, 3338 false, hbt, min_pcnt,
3328 env); 3339 env, warn_lost_event);
3329 } 3340 }
3330 3341
3331 if (symbol_conf.event_group) { 3342 if (symbol_conf.event_group) {
@@ -3342,5 +3353,6 @@ single_entry:
3342 } 3353 }
3343 3354
3344 return __perf_evlist__tui_browse_hists(evlist, nr_entries, help, 3355 return __perf_evlist__tui_browse_hists(evlist, nr_entries, help,
3345 hbt, min_pcnt, env); 3356 hbt, min_pcnt, env,
3357 warn_lost_event);
3346} 3358}
diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h
index ba431777f559..9428bee076f2 100644
--- a/tools/perf/ui/browsers/hists.h
+++ b/tools/perf/ui/browsers/hists.h
@@ -28,7 +28,8 @@ struct hist_browser {
28 28
29struct hist_browser *hist_browser__new(struct hists *hists); 29struct hist_browser *hist_browser__new(struct hists *hists);
30void hist_browser__delete(struct hist_browser *browser); 30void hist_browser__delete(struct hist_browser *browser);
31int hist_browser__run(struct hist_browser *browser, const char *help); 31int hist_browser__run(struct hist_browser *browser, const char *help,
32 bool warn_lost_event);
32void hist_browser__init(struct hist_browser *browser, 33void hist_browser__init(struct hist_browser *browser,
33 struct hists *hists); 34 struct hists *hists);
34#endif /* _PERF_UI_BROWSER_HISTS_H_ */ 35#endif /* _PERF_UI_BROWSER_HISTS_H_ */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index ac35cd214feb..e5fc14e53c05 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -715,28 +715,11 @@ union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int
715 return perf_mmap__read_forward(md); 715 return perf_mmap__read_forward(md);
716} 716}
717 717
718union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
719{
720 struct perf_mmap *md = &evlist->mmap[idx];
721
722 /*
723 * No need to check messup for backward ring buffer:
724 * We can always read arbitrary long data from a backward
725 * ring buffer unless we forget to pause it before reading.
726 */
727 return perf_mmap__read_backward(md);
728}
729
730union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) 718union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
731{ 719{
732 return perf_evlist__mmap_read_forward(evlist, idx); 720 return perf_evlist__mmap_read_forward(evlist, idx);
733} 721}
734 722
735void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
736{
737 perf_mmap__read_catchup(&evlist->mmap[idx]);
738}
739
740void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) 723void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
741{ 724{
742 perf_mmap__consume(&evlist->mmap[idx], false); 725 perf_mmap__consume(&evlist->mmap[idx], false);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 75f8e0ad5d76..336b838e6957 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -133,10 +133,6 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx);
133 133
134union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, 134union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist,
135 int idx); 135 int idx);
136union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist,
137 int idx);
138void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx);
139
140void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx); 136void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx);
141 137
142int perf_evlist__open(struct perf_evlist *evlist); 138int perf_evlist__open(struct perf_evlist *evlist);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ff359c9ece2e..ef351688b797 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -41,17 +41,7 @@
41 41
42#include "sane_ctype.h" 42#include "sane_ctype.h"
43 43
44static struct { 44struct perf_missing_features perf_missing_features;
45 bool sample_id_all;
46 bool exclude_guest;
47 bool mmap2;
48 bool cloexec;
49 bool clockid;
50 bool clockid_wrong;
51 bool lbr_flags;
52 bool write_backward;
53 bool group_read;
54} perf_missing_features;
55 45
56static clockid_t clockid; 46static clockid_t clockid;
57 47
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 846e41644525..a7487c6d1866 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -149,6 +149,20 @@ union u64_swap {
149 u32 val32[2]; 149 u32 val32[2];
150}; 150};
151 151
152struct perf_missing_features {
153 bool sample_id_all;
154 bool exclude_guest;
155 bool mmap2;
156 bool cloexec;
157 bool clockid;
158 bool clockid_wrong;
159 bool lbr_flags;
160 bool write_backward;
161 bool group_read;
162};
163
164extern struct perf_missing_features perf_missing_features;
165
152struct cpu_map; 166struct cpu_map;
153struct target; 167struct target;
154struct thread_map; 168struct thread_map;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index f6630cb95eff..02721b579746 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -430,7 +430,8 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
430int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, 430int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
431 struct hist_browser_timer *hbt, 431 struct hist_browser_timer *hbt,
432 float min_pcnt, 432 float min_pcnt,
433 struct perf_env *env); 433 struct perf_env *env,
434 bool warn_lost_event);
434int script_browse(const char *script_opt); 435int script_browse(const char *script_opt);
435#else 436#else
436static inline 437static inline
@@ -438,7 +439,8 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused,
438 const char *help __maybe_unused, 439 const char *help __maybe_unused,
439 struct hist_browser_timer *hbt __maybe_unused, 440 struct hist_browser_timer *hbt __maybe_unused,
440 float min_pcnt __maybe_unused, 441 float min_pcnt __maybe_unused,
441 struct perf_env *env __maybe_unused) 442 struct perf_env *env __maybe_unused,
443 bool warn_lost_event __maybe_unused)
442{ 444{
443 return 0; 445 return 0;
444} 446}
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 05076e683938..91531a7c8fbf 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -22,29 +22,27 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map)
22 22
23/* When check_messup is true, 'end' must points to a good entry */ 23/* When check_messup is true, 'end' must points to a good entry */
24static union perf_event *perf_mmap__read(struct perf_mmap *map, 24static union perf_event *perf_mmap__read(struct perf_mmap *map,
25 u64 start, u64 end, u64 *prev) 25 u64 *startp, u64 end)
26{ 26{
27 unsigned char *data = map->base + page_size; 27 unsigned char *data = map->base + page_size;
28 union perf_event *event = NULL; 28 union perf_event *event = NULL;
29 int diff = end - start; 29 int diff = end - *startp;
30 30
31 if (diff >= (int)sizeof(event->header)) { 31 if (diff >= (int)sizeof(event->header)) {
32 size_t size; 32 size_t size;
33 33
34 event = (union perf_event *)&data[start & map->mask]; 34 event = (union perf_event *)&data[*startp & map->mask];
35 size = event->header.size; 35 size = event->header.size;
36 36
37 if (size < sizeof(event->header) || diff < (int)size) { 37 if (size < sizeof(event->header) || diff < (int)size)
38 event = NULL; 38 return NULL;
39 goto broken_event;
40 }
41 39
42 /* 40 /*
43 * Event straddles the mmap boundary -- header should always 41 * Event straddles the mmap boundary -- header should always
44 * be inside due to u64 alignment of output. 42 * be inside due to u64 alignment of output.
45 */ 43 */
46 if ((start & map->mask) + size != ((start + size) & map->mask)) { 44 if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
47 unsigned int offset = start; 45 unsigned int offset = *startp;
48 unsigned int len = min(sizeof(*event), size), cpy; 46 unsigned int len = min(sizeof(*event), size), cpy;
49 void *dst = map->event_copy; 47 void *dst = map->event_copy;
50 48
@@ -59,20 +57,19 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
59 event = (union perf_event *)map->event_copy; 57 event = (union perf_event *)map->event_copy;
60 } 58 }
61 59
62 start += size; 60 *startp += size;
63 } 61 }
64 62
65broken_event:
66 if (prev)
67 *prev = start;
68
69 return event; 63 return event;
70} 64}
71 65
66/*
67 * legacy interface for mmap read.
68 * Don't use it. Use perf_mmap__read_event().
69 */
72union perf_event *perf_mmap__read_forward(struct perf_mmap *map) 70union perf_event *perf_mmap__read_forward(struct perf_mmap *map)
73{ 71{
74 u64 head; 72 u64 head;
75 u64 old = map->prev;
76 73
77 /* 74 /*
78 * Check if event was unmapped due to a POLLHUP/POLLERR. 75 * Check if event was unmapped due to a POLLHUP/POLLERR.
@@ -82,13 +79,26 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map)
82 79
83 head = perf_mmap__read_head(map); 80 head = perf_mmap__read_head(map);
84 81
85 return perf_mmap__read(map, old, head, &map->prev); 82 return perf_mmap__read(map, &map->prev, head);
86} 83}
87 84
88union perf_event *perf_mmap__read_backward(struct perf_mmap *map) 85/*
86 * Read event from ring buffer one by one.
87 * Return one event for each call.
88 *
89 * Usage:
90 * perf_mmap__read_init()
91 * while(event = perf_mmap__read_event()) {
92 * //process the event
93 * perf_mmap__consume()
94 * }
95 * perf_mmap__read_done()
96 */
97union perf_event *perf_mmap__read_event(struct perf_mmap *map,
98 bool overwrite,
99 u64 *startp, u64 end)
89{ 100{
90 u64 head, end; 101 union perf_event *event;
91 u64 start = map->prev;
92 102
93 /* 103 /*
94 * Check if event was unmapped due to a POLLHUP/POLLERR. 104 * Check if event was unmapped due to a POLLHUP/POLLERR.
@@ -96,40 +106,19 @@ union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
96 if (!refcount_read(&map->refcnt)) 106 if (!refcount_read(&map->refcnt))
97 return NULL; 107 return NULL;
98 108
99 head = perf_mmap__read_head(map); 109 if (startp == NULL)
100 if (!head)
101 return NULL; 110 return NULL;
102 111
103 /* 112 /* non-overwirte doesn't pause the ringbuffer */
104 * 'head' pointer starts from 0. Kernel minus sizeof(record) form 113 if (!overwrite)
105 * it each time when kernel writes to it, so in fact 'head' is 114 end = perf_mmap__read_head(map);
106 * negative. 'end' pointer is made manually by adding the size of
107 * the ring buffer to 'head' pointer, means the validate data can
108 * read is the whole ring buffer. If 'end' is positive, the ring
109 * buffer has not fully filled, so we must adjust 'end' to 0.
110 *
111 * However, since both 'head' and 'end' is unsigned, we can't
112 * simply compare 'end' against 0. Here we compare '-head' and
113 * the size of the ring buffer, where -head is the number of bytes
114 * kernel write to the ring buffer.
115 */
116 if (-head < (u64)(map->mask + 1))
117 end = 0;
118 else
119 end = head + map->mask + 1;
120
121 return perf_mmap__read(map, start, end, &map->prev);
122}
123 115
124void perf_mmap__read_catchup(struct perf_mmap *map) 116 event = perf_mmap__read(map, startp, end);
125{
126 u64 head;
127 117
128 if (!refcount_read(&map->refcnt)) 118 if (!overwrite)
129 return; 119 map->prev = *startp;
130 120
131 head = perf_mmap__read_head(map); 121 return event;
132 map->prev = head;
133} 122}
134 123
135static bool perf_mmap__empty(struct perf_mmap *map) 124static bool perf_mmap__empty(struct perf_mmap *map)
@@ -267,41 +256,60 @@ static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u6
267 return -1; 256 return -1;
268} 257}
269 258
270int perf_mmap__push(struct perf_mmap *md, bool overwrite, 259/*
271 void *to, int push(void *to, void *buf, size_t size)) 260 * Report the start and end of the available data in ringbuffer
261 */
262int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
263 u64 *startp, u64 *endp)
272{ 264{
273 u64 head = perf_mmap__read_head(md); 265 u64 head = perf_mmap__read_head(md);
274 u64 old = md->prev; 266 u64 old = md->prev;
275 u64 end = head, start = old;
276 unsigned char *data = md->base + page_size; 267 unsigned char *data = md->base + page_size;
277 unsigned long size; 268 unsigned long size;
278 void *buf;
279 int rc = 0;
280 269
281 start = overwrite ? head : old; 270 *startp = overwrite ? head : old;
282 end = overwrite ? old : head; 271 *endp = overwrite ? old : head;
283 272
284 if (start == end) 273 if (*startp == *endp)
285 return 0; 274 return -EAGAIN;
286 275
287 size = end - start; 276 size = *endp - *startp;
288 if (size > (unsigned long)(md->mask) + 1) { 277 if (size > (unsigned long)(md->mask) + 1) {
289 if (!overwrite) { 278 if (!overwrite) {
290 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); 279 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
291 280
292 md->prev = head; 281 md->prev = head;
293 perf_mmap__consume(md, overwrite); 282 perf_mmap__consume(md, overwrite);
294 return 0; 283 return -EAGAIN;
295 } 284 }
296 285
297 /* 286 /*
298 * Backward ring buffer is full. We still have a chance to read 287 * Backward ring buffer is full. We still have a chance to read
299 * most of data from it. 288 * most of data from it.
300 */ 289 */
301 if (overwrite_rb_find_range(data, md->mask, head, &start, &end)) 290 if (overwrite_rb_find_range(data, md->mask, head, startp, endp))
302 return -1; 291 return -EINVAL;
303 } 292 }
304 293
294 return 0;
295}
296
297int perf_mmap__push(struct perf_mmap *md, bool overwrite,
298 void *to, int push(void *to, void *buf, size_t size))
299{
300 u64 head = perf_mmap__read_head(md);
301 u64 end, start;
302 unsigned char *data = md->base + page_size;
303 unsigned long size;
304 void *buf;
305 int rc = 0;
306
307 rc = perf_mmap__read_init(md, overwrite, &start, &end);
308 if (rc < 0)
309 return (rc == -EAGAIN) ? 0 : -1;
310
311 size = end - start;
312
305 if ((start & md->mask) + size != (end & md->mask)) { 313 if ((start & md->mask) + size != (end & md->mask)) {
306 buf = &data[start & md->mask]; 314 buf = &data[start & md->mask];
307 size = md->mask + 1 - (start & md->mask); 315 size = md->mask + 1 - (start & md->mask);
@@ -327,3 +335,14 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
327out: 335out:
328 return rc; 336 return rc;
329} 337}
338
339/*
340 * Mandatory for overwrite mode
341 * The direction of overwrite mode is backward.
342 * The last perf_mmap__read() will set tail to map->prev.
343 * Need to correct the map->prev to head which is the end of next read.
344 */
345void perf_mmap__read_done(struct perf_mmap *map)
346{
347 map->prev = perf_mmap__read_head(map);
348}
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index e43d7b55a55f..ec7d3a24e276 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -65,8 +65,6 @@ void perf_mmap__put(struct perf_mmap *map);
65 65
66void perf_mmap__consume(struct perf_mmap *map, bool overwrite); 66void perf_mmap__consume(struct perf_mmap *map, bool overwrite);
67 67
68void perf_mmap__read_catchup(struct perf_mmap *md);
69
70static inline u64 perf_mmap__read_head(struct perf_mmap *mm) 68static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
71{ 69{
72 struct perf_event_mmap_page *pc = mm->base; 70 struct perf_event_mmap_page *pc = mm->base;
@@ -87,11 +85,17 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
87} 85}
88 86
89union perf_event *perf_mmap__read_forward(struct perf_mmap *map); 87union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
90union perf_event *perf_mmap__read_backward(struct perf_mmap *map); 88
89union perf_event *perf_mmap__read_event(struct perf_mmap *map,
90 bool overwrite,
91 u64 *startp, u64 end);
91 92
92int perf_mmap__push(struct perf_mmap *md, bool backward, 93int perf_mmap__push(struct perf_mmap *md, bool backward,
93 void *to, int push(void *to, void *buf, size_t size)); 94 void *to, int push(void *to, void *buf, size_t size));
94 95
95size_t perf_mmap__mmap_len(struct perf_mmap *map); 96size_t perf_mmap__mmap_len(struct perf_mmap *map);
96 97
98int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
99 u64 *startp, u64 *endp);
100void perf_mmap__read_done(struct perf_mmap *map);
97#endif /*__PERF_MMAP_H */ 101#endif /*__PERF_MMAP_H */
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 443892dabedb..1019bbc5dbd8 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -340,35 +340,15 @@ size_t hex_width(u64 v)
340 return n; 340 return n;
341} 341}
342 342
343static int hex(char ch)
344{
345 if ((ch >= '0') && (ch <= '9'))
346 return ch - '0';
347 if ((ch >= 'a') && (ch <= 'f'))
348 return ch - 'a' + 10;
349 if ((ch >= 'A') && (ch <= 'F'))
350 return ch - 'A' + 10;
351 return -1;
352}
353
354/* 343/*
355 * While we find nice hex chars, build a long_val. 344 * While we find nice hex chars, build a long_val.
356 * Return number of chars processed. 345 * Return number of chars processed.
357 */ 346 */
358int hex2u64(const char *ptr, u64 *long_val) 347int hex2u64(const char *ptr, u64 *long_val)
359{ 348{
360 const char *p = ptr; 349 char *p;
361 *long_val = 0;
362
363 while (*p) {
364 const int hex_val = hex(*p);
365 350
366 if (hex_val < 0) 351 *long_val = strtoull(ptr, &p, 16);
367 break;
368
369 *long_val = (*long_val << 4) | hex_val;
370 p++;
371 }
372 352
373 return p - ptr; 353 return p - ptr;
374} 354}
diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
new file mode 100755
index 000000000000..06b1d7cc12cc
--- /dev/null
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -0,0 +1,375 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# IPv4 and IPv6 onlink tests
5
6PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
7
8# Network interfaces
9# - odd in current namespace; even in peer ns
10declare -A NETIFS
11# default VRF
12NETIFS[p1]=veth1
13NETIFS[p2]=veth2
14NETIFS[p3]=veth3
15NETIFS[p4]=veth4
16# VRF
17NETIFS[p5]=veth5
18NETIFS[p6]=veth6
19NETIFS[p7]=veth7
20NETIFS[p8]=veth8
21
22# /24 network
23declare -A V4ADDRS
24V4ADDRS[p1]=169.254.1.1
25V4ADDRS[p2]=169.254.1.2
26V4ADDRS[p3]=169.254.3.1
27V4ADDRS[p4]=169.254.3.2
28V4ADDRS[p5]=169.254.5.1
29V4ADDRS[p6]=169.254.5.2
30V4ADDRS[p7]=169.254.7.1
31V4ADDRS[p8]=169.254.7.2
32
33# /64 network
34declare -A V6ADDRS
35V6ADDRS[p1]=2001:db8:101::1
36V6ADDRS[p2]=2001:db8:101::2
37V6ADDRS[p3]=2001:db8:301::1
38V6ADDRS[p4]=2001:db8:301::2
39V6ADDRS[p5]=2001:db8:501::1
40V6ADDRS[p6]=2001:db8:501::2
41V6ADDRS[p7]=2001:db8:701::1
42V6ADDRS[p8]=2001:db8:701::2
43
44# Test networks:
45# [1] = default table
46# [2] = VRF
47#
48# /32 host routes
49declare -A TEST_NET4
50TEST_NET4[1]=169.254.101
51TEST_NET4[2]=169.254.102
52# /128 host routes
53declare -A TEST_NET6
54TEST_NET6[1]=2001:db8:101
55TEST_NET6[2]=2001:db8:102
56
57# connected gateway
58CONGW[1]=169.254.1.254
59CONGW[2]=169.254.5.254
60
61# recursive gateway
62RECGW4[1]=169.254.11.254
63RECGW4[2]=169.254.12.254
64RECGW6[1]=2001:db8:11::64
65RECGW6[2]=2001:db8:12::64
66
67# for v4 mapped to v6
68declare -A TEST_NET4IN6IN6
69TEST_NET4IN6[1]=10.1.1.254
70TEST_NET4IN6[2]=10.2.1.254
71
72# mcast address
73MCAST6=ff02::1
74
75
76PEER_NS=bart
77PEER_CMD="ip netns exec ${PEER_NS}"
78VRF=lisa
79VRF_TABLE=1101
80PBR_TABLE=101
81
82################################################################################
83# utilities
84
85log_test()
86{
87 local rc=$1
88 local expected=$2
89 local msg="$3"
90
91 if [ ${rc} -eq ${expected} ]; then
92 nsuccess=$((nsuccess+1))
93 printf "\n TEST: %-50s [ OK ]\n" "${msg}"
94 else
95 nfail=$((nfail+1))
96 printf "\n TEST: %-50s [FAIL]\n" "${msg}"
97 if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
98 echo
99 echo "hit enter to continue, 'q' to quit"
100 read a
101 [ "$a" = "q" ] && exit 1
102 fi
103 fi
104}
105
106log_section()
107{
108 echo
109 echo "######################################################################"
110 echo "TEST SECTION: $*"
111 echo "######################################################################"
112}
113
114log_subsection()
115{
116 echo
117 echo "#########################################"
118 echo "TEST SUBSECTION: $*"
119}
120
121run_cmd()
122{
123 echo
124 echo "COMMAND: $*"
125 eval $*
126}
127
128get_linklocal()
129{
130 local dev=$1
131 local pfx
132 local addr
133
134 addr=$(${pfx} ip -6 -br addr show dev ${dev} | \
135 awk '{
136 for (i = 3; i <= NF; ++i) {
137 if ($i ~ /^fe80/)
138 print $i
139 }
140 }'
141 )
142 addr=${addr/\/*}
143
144 [ -z "$addr" ] && return 1
145
146 echo $addr
147
148 return 0
149}
150
151################################################################################
152#
153
154setup()
155{
156 echo
157 echo "########################################"
158 echo "Configuring interfaces"
159
160 set -e
161
162 # create namespace
163 ip netns add ${PEER_NS}
164 ip -netns ${PEER_NS} li set lo up
165
166 # add vrf table
167 ip li add ${VRF} type vrf table ${VRF_TABLE}
168 ip li set ${VRF} up
169 ip ro add table ${VRF_TABLE} unreachable default
170 ip -6 ro add table ${VRF_TABLE} unreachable default
171
172 # create test interfaces
173 ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]}
174 ip li add ${NETIFS[p3]} type veth peer name ${NETIFS[p4]}
175 ip li add ${NETIFS[p5]} type veth peer name ${NETIFS[p6]}
176 ip li add ${NETIFS[p7]} type veth peer name ${NETIFS[p8]}
177
178 # enslave vrf interfaces
179 for n in 5 7; do
180 ip li set ${NETIFS[p${n}]} vrf ${VRF}
181 done
182
183 # add addresses
184 for n in 1 3 5 7; do
185 ip li set ${NETIFS[p${n}]} up
186 ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
187 ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
188 done
189
190 # move peer interfaces to namespace and add addresses
191 for n in 2 4 6 8; do
192 ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up
193 ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
194 ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
195 done
196
197 set +e
198
199 # let DAD complete - assume default of 1 probe
200 sleep 1
201}
202
203cleanup()
204{
205 # make sure we start from a clean slate
206 ip netns del ${PEER_NS} 2>/dev/null
207 for n in 1 3 5 7; do
208 ip link del ${NETIFS[p${n}]} 2>/dev/null
209 done
210 ip link del ${VRF} 2>/dev/null
211 ip ro flush table ${VRF_TABLE}
212 ip -6 ro flush table ${VRF_TABLE}
213}
214
215################################################################################
216# IPv4 tests
217#
218
219run_ip()
220{
221 local table="$1"
222 local prefix="$2"
223 local gw="$3"
224 local dev="$4"
225 local exp_rc="$5"
226 local desc="$6"
227
228 # dev arg may be empty
229 [ -n "${dev}" ] && dev="dev ${dev}"
230
231 run_cmd ip ro add table "${table}" "${prefix}"/32 via "${gw}" "${dev}" onlink
232 log_test $? ${exp_rc} "${desc}"
233}
234
235valid_onlink_ipv4()
236{
237 # - unicast connected, unicast recursive
238 #
239 log_subsection "default VRF - main table"
240
241 run_ip 254 ${TEST_NET4[1]}.1 ${CONGW[1]} ${NETIFS[p1]} 0 "unicast connected"
242 run_ip 254 ${TEST_NET4[1]}.2 ${RECGW4[1]} ${NETIFS[p1]} 0 "unicast recursive"
243
244 log_subsection "VRF ${VRF}"
245
246 run_ip ${VRF_TABLE} ${TEST_NET4[2]}.1 ${CONGW[2]} ${NETIFS[p5]} 0 "unicast connected"
247 run_ip ${VRF_TABLE} ${TEST_NET4[2]}.2 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
248
249 log_subsection "VRF device, PBR table"
250
251 run_ip ${PBR_TABLE} ${TEST_NET4[2]}.3 ${CONGW[2]} ${NETIFS[p5]} 0 "unicast connected"
252 run_ip ${PBR_TABLE} ${TEST_NET4[2]}.4 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
253}
254
255invalid_onlink_ipv4()
256{
257 run_ip 254 ${TEST_NET4[1]}.11 ${V4ADDRS[p1]} ${NETIFS[p1]} 2 \
258 "Invalid gw - local unicast address"
259
260 run_ip ${VRF_TABLE} ${TEST_NET4[2]}.11 ${V4ADDRS[p5]} ${NETIFS[p5]} 2 \
261 "Invalid gw - local unicast address, VRF"
262
263 run_ip 254 ${TEST_NET4[1]}.101 ${V4ADDRS[p1]} "" 2 "No nexthop device given"
264
265 run_ip 254 ${TEST_NET4[1]}.102 ${V4ADDRS[p3]} ${NETIFS[p1]} 2 \
266 "Gateway resolves to wrong nexthop device"
267
268 run_ip ${VRF_TABLE} ${TEST_NET4[2]}.103 ${V4ADDRS[p7]} ${NETIFS[p5]} 2 \
269 "Gateway resolves to wrong nexthop device - VRF"
270}
271
272################################################################################
273# IPv6 tests
274#
275
276run_ip6()
277{
278 local table="$1"
279 local prefix="$2"
280 local gw="$3"
281 local dev="$4"
282 local exp_rc="$5"
283 local desc="$6"
284
285 # dev arg may be empty
286 [ -n "${dev}" ] && dev="dev ${dev}"
287
288 run_cmd ip -6 ro add table "${table}" "${prefix}"/128 via "${gw}" "${dev}" onlink
289 log_test $? ${exp_rc} "${desc}"
290}
291
292valid_onlink_ipv6()
293{
294 # - unicast connected, unicast recursive, v4-mapped
295 #
296 log_subsection "default VRF - main table"
297
298 run_ip6 254 ${TEST_NET6[1]}::1 ${V6ADDRS[p1]/::*}::64 ${NETIFS[p1]} 0 "unicast connected"
299 run_ip6 254 ${TEST_NET6[1]}::2 ${RECGW6[1]} ${NETIFS[p1]} 0 "unicast recursive"
300 run_ip6 254 ${TEST_NET6[1]}::3 ::ffff:${TEST_NET4IN6[1]} ${NETIFS[p1]} 0 "v4-mapped"
301
302 log_subsection "VRF ${VRF}"
303
304 run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::1 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
305 run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::2 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
306 run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::3 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
307
308 log_subsection "VRF device, PBR table"
309
310 run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::4 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
311 run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::5 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
312 run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::6 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
313}
314
315invalid_onlink_ipv6()
316{
317 local lladdr
318
319 lladdr=$(get_linklocal ${NETIFS[p1]}) || return 1
320
321 run_ip6 254 ${TEST_NET6[1]}::11 ${V6ADDRS[p1]} ${NETIFS[p1]} 2 \
322 "Invalid gw - local unicast address"
323 run_ip6 254 ${TEST_NET6[1]}::12 ${lladdr} ${NETIFS[p1]} 2 \
324 "Invalid gw - local linklocal address"
325 run_ip6 254 ${TEST_NET6[1]}::12 ${MCAST6} ${NETIFS[p1]} 2 \
326 "Invalid gw - multicast address"
327
328 lladdr=$(get_linklocal ${NETIFS[p5]}) || return 1
329 run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::11 ${V6ADDRS[p5]} ${NETIFS[p5]} 2 \
330 "Invalid gw - local unicast address, VRF"
331 run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${lladdr} ${NETIFS[p5]} 2 \
332 "Invalid gw - local linklocal address, VRF"
333 run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${MCAST6} ${NETIFS[p5]} 2 \
334 "Invalid gw - multicast address, VRF"
335
336 run_ip6 254 ${TEST_NET6[1]}::101 ${V6ADDRS[p1]} "" 2 \
337 "No nexthop device given"
338
339 # default VRF validation is done against LOCAL table
340 # run_ip6 254 ${TEST_NET6[1]}::102 ${V6ADDRS[p3]/::[0-9]/::64} ${NETIFS[p1]} 2 \
341 # "Gateway resolves to wrong nexthop device"
342
343 run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::103 ${V6ADDRS[p7]/::[0-9]/::64} ${NETIFS[p5]} 2 \
344 "Gateway resolves to wrong nexthop device - VRF"
345}
346
347run_onlink_tests()
348{
349 log_section "IPv4 onlink"
350 log_subsection "Valid onlink commands"
351 valid_onlink_ipv4
352 log_subsection "Invalid onlink commands"
353 invalid_onlink_ipv4
354
355 log_section "IPv6 onlink"
356 log_subsection "Valid onlink commands"
357 valid_onlink_ipv6
358 invalid_onlink_ipv6
359}
360
361################################################################################
362# main
363
364nsuccess=0
365nfail=0
366
367cleanup
368setup
369run_onlink_tests
370cleanup
371
372if [ "$TESTS" != "none" ]; then
373 printf "\nTests passed: %3d\n" ${nsuccess}
374 printf "Tests failed: %3d\n" ${nfail}
375fi
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index a9154eefb2e2..b617985ecdc1 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -6,154 +6,155 @@
6 6
7ret=0 7ret=0
8 8
9check_err() 9PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
10{
11 if [ $ret -eq 0 ]; then
12 ret=$1
13 fi
14}
15 10
16check_fail() 11log_test()
17{ 12{
18 if [ $1 -eq 0 ]; then 13 local rc=$1
14 local expected=$2
15 local msg="$3"
16
17 if [ ${rc} -eq ${expected} ]; then
18 printf " %-60s [ OK ]\n" "${msg}"
19 else
19 ret=1 20 ret=1
21 printf " %-60s [FAIL]\n" "${msg}"
22 if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
23 echo
24 echo "hit enter to continue, 'q' to quit"
25 read a
26 [ "$a" = "q" ] && exit 1
27 fi
20 fi 28 fi
21} 29}
22 30
23netns_create() 31setup()
24{ 32{
25 local testns=$1 33 set -e
34 ip netns add testns
35 ip -netns testns link set dev lo up
36
37 ip -netns testns link add dummy0 type dummy
38 ip -netns testns link set dev dummy0 up
39 ip -netns testns address add 198.51.100.1/24 dev dummy0
40 ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
41 set +e
26 42
27 ip netns add $testns
28 ip netns exec $testns ip link set dev lo up
29} 43}
30 44
31fib_unreg_unicast_test() 45cleanup()
32{ 46{
33 ret=0 47 ip -netns testns link del dev dummy0 &> /dev/null
34 48 ip netns del testns
35 netns_create "testns" 49}
36
37 ip netns exec testns ip link add dummy0 type dummy
38 ip netns exec testns ip link set dev dummy0 up
39 50
40 ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 51fib_unreg_unicast_test()
41 ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 52{
53 echo
54 echo "Single path route test"
42 55
43 ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null 56 setup
44 check_err $?
45 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
46 check_err $?
47 57
48 ip netns exec testns ip link del dev dummy0 58 echo " Start point"
49 check_err $? 59 ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
60 log_test $? 0 "IPv4 fibmatch"
61 ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
62 log_test $? 0 "IPv6 fibmatch"
50 63
51 ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null 64 set -e
52 check_fail $? 65 ip -netns testns link del dev dummy0
53 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null 66 set +e
54 check_fail $?
55 67
56 ip netns del testns 68 echo " Nexthop device deleted"
69 ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
70 log_test $? 2 "IPv4 fibmatch - no route"
71 ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
72 log_test $? 2 "IPv6 fibmatch - no route"
57 73
58 if [ $ret -ne 0 ]; then 74 cleanup
59 echo "FAIL: unicast route test"
60 return 1
61 fi
62 echo "PASS: unicast route test"
63} 75}
64 76
65fib_unreg_multipath_test() 77fib_unreg_multipath_test()
66{ 78{
67 ret=0
68
69 netns_create "testns"
70 79
71 ip netns exec testns ip link add dummy0 type dummy 80 echo
72 ip netns exec testns ip link set dev dummy0 up 81 echo "Multipath route test"
73 82
74 ip netns exec testns ip link add dummy1 type dummy 83 setup
75 ip netns exec testns ip link set dev dummy1 up
76 84
77 ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 85 set -e
78 ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 86 ip -netns testns link add dummy1 type dummy
87 ip -netns testns link set dev dummy1 up
88 ip -netns testns address add 192.0.2.1/24 dev dummy1
89 ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy1
79 90
80 ip netns exec testns ip address add 192.0.2.1/24 dev dummy1 91 ip -netns testns route add 203.0.113.0/24 \
81 ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1
82
83 ip netns exec testns ip route add 203.0.113.0/24 \
84 nexthop via 198.51.100.2 dev dummy0 \ 92 nexthop via 198.51.100.2 dev dummy0 \
85 nexthop via 192.0.2.2 dev dummy1 93 nexthop via 192.0.2.2 dev dummy1
86 ip netns exec testns ip -6 route add 2001:db8:3::/64 \ 94 ip -netns testns -6 route add 2001:db8:3::/64 \
87 nexthop via 2001:db8:1::2 dev dummy0 \ 95 nexthop via 2001:db8:1::2 dev dummy0 \
88 nexthop via 2001:db8:2::2 dev dummy1 96 nexthop via 2001:db8:2::2 dev dummy1
97 set +e
98
99 echo " Start point"
100 ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
101 log_test $? 0 "IPv4 fibmatch"
102 ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
103 log_test $? 0 "IPv6 fibmatch"
89 104
90 ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null 105 set -e
91 check_err $? 106 ip -netns testns link del dev dummy0
92 ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null 107 set +e
93 check_err $?
94 108
95 ip netns exec testns ip link del dev dummy0 109 echo " One nexthop device deleted"
96 check_err $? 110 ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
111 log_test $? 2 "IPv4 - multipath route removed on delete"
97 112
98 ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null 113 ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
99 check_fail $?
100 ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
101 # In IPv6 we do not flush the entire multipath route. 114 # In IPv6 we do not flush the entire multipath route.
102 check_err $? 115 log_test $? 0 "IPv6 - multipath down to single path"
103 116
104 ip netns exec testns ip link del dev dummy1 117 set -e
118 ip -netns testns link del dev dummy1
119 set +e
105 120
106 ip netns del testns 121 echo " Second nexthop device deleted"
122 ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
123 log_test $? 2 "IPv6 - no route"
107 124
108 if [ $ret -ne 0 ]; then 125 cleanup
109 echo "FAIL: multipath route test"
110 return 1
111 fi
112 echo "PASS: multipath route test"
113} 126}
114 127
115fib_unreg_test() 128fib_unreg_test()
116{ 129{
117 echo "Running netdev unregister tests"
118
119 fib_unreg_unicast_test 130 fib_unreg_unicast_test
120 fib_unreg_multipath_test 131 fib_unreg_multipath_test
121} 132}
122 133
123fib_down_unicast_test() 134fib_down_unicast_test()
124{ 135{
125 ret=0 136 echo
126 137 echo "Single path, admin down"
127 netns_create "testns"
128
129 ip netns exec testns ip link add dummy0 type dummy
130 ip netns exec testns ip link set dev dummy0 up
131
132 ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
133 ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
134 138
135 ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null 139 setup
136 check_err $?
137 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
138 check_err $?
139 140
140 ip netns exec testns ip link set dev dummy0 down 141 echo " Start point"
141 check_err $? 142 ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
143 log_test $? 0 "IPv4 fibmatch"
144 ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
145 log_test $? 0 "IPv6 fibmatch"
142 146
143 ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null 147 set -e
144 check_fail $? 148 ip -netns testns link set dev dummy0 down
145 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null 149 set +e
146 check_fail $?
147 150
148 ip netns exec testns ip link del dev dummy0 151 echo " Route deleted on down"
152 ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
153 log_test $? 2 "IPv4 fibmatch"
154 ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
155 log_test $? 2 "IPv6 fibmatch"
149 156
150 ip netns del testns 157 cleanup
151
152 if [ $ret -ne 0 ]; then
153 echo "FAIL: unicast route test"
154 return 1
155 fi
156 echo "PASS: unicast route test"
157} 158}
158 159
159fib_down_multipath_test_do() 160fib_down_multipath_test_do()
@@ -161,242 +162,229 @@ fib_down_multipath_test_do()
161 local down_dev=$1 162 local down_dev=$1
162 local up_dev=$2 163 local up_dev=$2
163 164
164 ip netns exec testns ip route get fibmatch 203.0.113.1 \ 165 ip -netns testns route get fibmatch 203.0.113.1 \
165 oif $down_dev &> /dev/null 166 oif $down_dev &> /dev/null
166 check_fail $? 167 log_test $? 2 "IPv4 fibmatch on down device"
167 ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \ 168 ip -netns testns -6 route get fibmatch 2001:db8:3::1 \
168 oif $down_dev &> /dev/null 169 oif $down_dev &> /dev/null
169 check_fail $? 170 log_test $? 2 "IPv6 fibmatch on down device"
170 171
171 ip netns exec testns ip route get fibmatch 203.0.113.1 \ 172 ip -netns testns route get fibmatch 203.0.113.1 \
172 oif $up_dev &> /dev/null 173 oif $up_dev &> /dev/null
173 check_err $? 174 log_test $? 0 "IPv4 fibmatch on up device"
174 ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \ 175 ip -netns testns -6 route get fibmatch 2001:db8:3::1 \
175 oif $up_dev &> /dev/null 176 oif $up_dev &> /dev/null
176 check_err $? 177 log_test $? 0 "IPv6 fibmatch on up device"
177 178
178 ip netns exec testns ip route get fibmatch 203.0.113.1 | \ 179 ip -netns testns route get fibmatch 203.0.113.1 | \
179 grep $down_dev | grep -q "dead linkdown" 180 grep $down_dev | grep -q "dead linkdown"
180 check_err $? 181 log_test $? 0 "IPv4 flags on down device"
181 ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \ 182 ip -netns testns -6 route get fibmatch 2001:db8:3::1 | \
182 grep $down_dev | grep -q "dead linkdown" 183 grep $down_dev | grep -q "dead linkdown"
183 check_err $? 184 log_test $? 0 "IPv6 flags on down device"
184 185
185 ip netns exec testns ip route get fibmatch 203.0.113.1 | \ 186 ip -netns testns route get fibmatch 203.0.113.1 | \
186 grep $up_dev | grep -q "dead linkdown" 187 grep $up_dev | grep -q "dead linkdown"
187 check_fail $? 188 log_test $? 1 "IPv4 flags on up device"
188 ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \ 189 ip -netns testns -6 route get fibmatch 2001:db8:3::1 | \
189 grep $up_dev | grep -q "dead linkdown" 190 grep $up_dev | grep -q "dead linkdown"
190 check_fail $? 191 log_test $? 1 "IPv6 flags on up device"
191} 192}
192 193
193fib_down_multipath_test() 194fib_down_multipath_test()
194{ 195{
195 ret=0 196 echo
197 echo "Admin down multipath"
196 198
197 netns_create "testns" 199 setup
198 200
199 ip netns exec testns ip link add dummy0 type dummy 201 set -e
200 ip netns exec testns ip link set dev dummy0 up 202 ip -netns testns link add dummy1 type dummy
203 ip -netns testns link set dev dummy1 up
201 204
202 ip netns exec testns ip link add dummy1 type dummy 205 ip -netns testns address add 192.0.2.1/24 dev dummy1
203 ip netns exec testns ip link set dev dummy1 up 206 ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy1
204 207
205 ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 208 ip -netns testns route add 203.0.113.0/24 \
206 ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
207
208 ip netns exec testns ip address add 192.0.2.1/24 dev dummy1
209 ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1
210
211 ip netns exec testns ip route add 203.0.113.0/24 \
212 nexthop via 198.51.100.2 dev dummy0 \ 209 nexthop via 198.51.100.2 dev dummy0 \
213 nexthop via 192.0.2.2 dev dummy1 210 nexthop via 192.0.2.2 dev dummy1
214 ip netns exec testns ip -6 route add 2001:db8:3::/64 \ 211 ip -netns testns -6 route add 2001:db8:3::/64 \
215 nexthop via 2001:db8:1::2 dev dummy0 \ 212 nexthop via 2001:db8:1::2 dev dummy0 \
216 nexthop via 2001:db8:2::2 dev dummy1 213 nexthop via 2001:db8:2::2 dev dummy1
214 set +e
215
216 echo " Verify start point"
217 ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
218 log_test $? 0 "IPv4 fibmatch"
217 219
218 ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null 220 ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
219 check_err $? 221 log_test $? 0 "IPv6 fibmatch"
220 ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
221 check_err $?
222 222
223 ip netns exec testns ip link set dev dummy0 down 223 set -e
224 check_err $? 224 ip -netns testns link set dev dummy0 down
225 set +e
225 226
227 echo " One device down, one up"
226 fib_down_multipath_test_do "dummy0" "dummy1" 228 fib_down_multipath_test_do "dummy0" "dummy1"
227 229
228 ip netns exec testns ip link set dev dummy0 up 230 set -e
229 check_err $? 231 ip -netns testns link set dev dummy0 up
230 ip netns exec testns ip link set dev dummy1 down 232 ip -netns testns link set dev dummy1 down
231 check_err $? 233 set +e
232 234
235 echo " Other device down and up"
233 fib_down_multipath_test_do "dummy1" "dummy0" 236 fib_down_multipath_test_do "dummy1" "dummy0"
234 237
235 ip netns exec testns ip link set dev dummy0 down 238 set -e
236 check_err $? 239 ip -netns testns link set dev dummy0 down
240 set +e
237 241
238 ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null 242 echo " Both devices down"
239 check_fail $? 243 ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
240 ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null 244 log_test $? 2 "IPv4 fibmatch"
241 check_fail $? 245 ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
246 log_test $? 2 "IPv6 fibmatch"
242 247
243 ip netns exec testns ip link del dev dummy1 248 ip -netns testns link del dev dummy1
244 ip netns exec testns ip link del dev dummy0 249 cleanup
245
246 ip netns del testns
247
248 if [ $ret -ne 0 ]; then
249 echo "FAIL: multipath route test"
250 return 1
251 fi
252 echo "PASS: multipath route test"
253} 250}
254 251
255fib_down_test() 252fib_down_test()
256{ 253{
257 echo "Running netdev down tests"
258
259 fib_down_unicast_test 254 fib_down_unicast_test
260 fib_down_multipath_test 255 fib_down_multipath_test
261} 256}
262 257
258# Local routes should not be affected when carrier changes.
263fib_carrier_local_test() 259fib_carrier_local_test()
264{ 260{
265 ret=0 261 echo
262 echo "Local carrier tests - single path"
266 263
267 # Local routes should not be affected when carrier changes. 264 setup
268 netns_create "testns"
269 265
270 ip netns exec testns ip link add dummy0 type dummy 266 set -e
271 ip netns exec testns ip link set dev dummy0 up 267 ip -netns testns link set dev dummy0 carrier on
268 set +e
272 269
273 ip netns exec testns ip link set dev dummy0 carrier on 270 echo " Start point"
271 ip -netns testns route get fibmatch 198.51.100.1 &> /dev/null
272 log_test $? 0 "IPv4 fibmatch"
273 ip -netns testns -6 route get fibmatch 2001:db8:1::1 &> /dev/null
274 log_test $? 0 "IPv6 fibmatch"
274 275
275 ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 276 ip -netns testns route get fibmatch 198.51.100.1 | \
276 ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
277
278 ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null
279 check_err $?
280 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null
281 check_err $?
282
283 ip netns exec testns ip route get fibmatch 198.51.100.1 | \
284 grep -q "linkdown" 277 grep -q "linkdown"
285 check_fail $? 278 log_test $? 1 "IPv4 - no linkdown flag"
286 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \ 279 ip -netns testns -6 route get fibmatch 2001:db8:1::1 | \
287 grep -q "linkdown" 280 grep -q "linkdown"
288 check_fail $? 281 log_test $? 1 "IPv6 - no linkdown flag"
289 282
290 ip netns exec testns ip link set dev dummy0 carrier off 283 set -e
284 ip -netns testns link set dev dummy0 carrier off
285 sleep 1
286 set +e
291 287
292 ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null 288 echo " Carrier off on nexthop"
293 check_err $? 289 ip -netns testns route get fibmatch 198.51.100.1 &> /dev/null
294 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null 290 log_test $? 0 "IPv4 fibmatch"
295 check_err $? 291 ip -netns testns -6 route get fibmatch 2001:db8:1::1 &> /dev/null
292 log_test $? 0 "IPv6 fibmatch"
296 293
297 ip netns exec testns ip route get fibmatch 198.51.100.1 | \ 294 ip -netns testns route get fibmatch 198.51.100.1 | \
298 grep -q "linkdown" 295 grep -q "linkdown"
299 check_fail $? 296 log_test $? 1 "IPv4 - linkdown flag set"
300 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \ 297 ip -netns testns -6 route get fibmatch 2001:db8:1::1 | \
301 grep -q "linkdown" 298 grep -q "linkdown"
302 check_fail $? 299 log_test $? 1 "IPv6 - linkdown flag set"
303 300
304 ip netns exec testns ip address add 192.0.2.1/24 dev dummy0 301 set -e
305 ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0 302 ip -netns testns address add 192.0.2.1/24 dev dummy0
303 ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy0
304 set +e
306 305
307 ip netns exec testns ip route get fibmatch 192.0.2.1 &> /dev/null 306 echo " Route to local address with carrier down"
308 check_err $? 307 ip -netns testns route get fibmatch 192.0.2.1 &> /dev/null
309 ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 &> /dev/null 308 log_test $? 0 "IPv4 fibmatch"
310 check_err $? 309 ip -netns testns -6 route get fibmatch 2001:db8:2::1 &> /dev/null
310 log_test $? 0 "IPv6 fibmatch"
311 311
312 ip netns exec testns ip route get fibmatch 192.0.2.1 | \ 312 ip -netns testns route get fibmatch 192.0.2.1 | \
313 grep -q "linkdown" 313 grep -q "linkdown"
314 check_fail $? 314 log_test $? 1 "IPv4 linkdown flag set"
315 ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 | \ 315 ip -netns testns -6 route get fibmatch 2001:db8:2::1 | \
316 grep -q "linkdown" 316 grep -q "linkdown"
317 check_fail $? 317 log_test $? 1 "IPv6 linkdown flag set"
318 318
319 ip netns exec testns ip link del dev dummy0 319 cleanup
320
321 ip netns del testns
322
323 if [ $ret -ne 0 ]; then
324 echo "FAIL: local route carrier test"
325 return 1
326 fi
327 echo "PASS: local route carrier test"
328} 320}
329 321
330fib_carrier_unicast_test() 322fib_carrier_unicast_test()
331{ 323{
332 ret=0 324 ret=0
333 325
334 netns_create "testns" 326 echo
335 327 echo "Single path route carrier test"
336 ip netns exec testns ip link add dummy0 type dummy
337 ip netns exec testns ip link set dev dummy0 up
338 328
339 ip netns exec testns ip link set dev dummy0 carrier on 329 setup
340 330
341 ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 331 set -e
342 ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 332 ip -netns testns link set dev dummy0 carrier on
333 set +e
343 334
344 ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null 335 echo " Start point"
345 check_err $? 336 ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
346 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null 337 log_test $? 0 "IPv4 fibmatch"
347 check_err $? 338 ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
339 log_test $? 0 "IPv6 fibmatch"
348 340
349 ip netns exec testns ip route get fibmatch 198.51.100.2 | \ 341 ip -netns testns route get fibmatch 198.51.100.2 | \
350 grep -q "linkdown" 342 grep -q "linkdown"
351 check_fail $? 343 log_test $? 1 "IPv4 no linkdown flag"
352 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \ 344 ip -netns testns -6 route get fibmatch 2001:db8:1::2 | \
353 grep -q "linkdown" 345 grep -q "linkdown"
354 check_fail $? 346 log_test $? 1 "IPv6 no linkdown flag"
355 347
356 ip netns exec testns ip link set dev dummy0 carrier off 348 set -e
349 ip -netns testns link set dev dummy0 carrier off
350 set +e
357 351
358 ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null 352 echo " Carrier down"
359 check_err $? 353 ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
360 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null 354 log_test $? 0 "IPv4 fibmatch"
361 check_err $? 355 ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
356 log_test $? 0 "IPv6 fibmatch"
362 357
363 ip netns exec testns ip route get fibmatch 198.51.100.2 | \ 358 ip -netns testns route get fibmatch 198.51.100.2 | \
364 grep -q "linkdown" 359 grep -q "linkdown"
365 check_err $? 360 log_test $? 0 "IPv4 linkdown flag set"
366 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \ 361 ip -netns testns -6 route get fibmatch 2001:db8:1::2 | \
367 grep -q "linkdown" 362 grep -q "linkdown"
368 check_err $? 363 log_test $? 0 "IPv6 linkdown flag set"
369 364
370 ip netns exec testns ip address add 192.0.2.1/24 dev dummy0 365 set -e
371 ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0 366 ip -netns testns address add 192.0.2.1/24 dev dummy0
367 ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy0
368 set +e
372 369
373 ip netns exec testns ip route get fibmatch 192.0.2.2 &> /dev/null 370 echo " Second address added with carrier down"
374 check_err $? 371 ip -netns testns route get fibmatch 192.0.2.2 &> /dev/null
375 ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 &> /dev/null 372 log_test $? 0 "IPv4 fibmatch"
376 check_err $? 373 ip -netns testns -6 route get fibmatch 2001:db8:2::2 &> /dev/null
374 log_test $? 0 "IPv6 fibmatch"
377 375
378 ip netns exec testns ip route get fibmatch 192.0.2.2 | \ 376 ip -netns testns route get fibmatch 192.0.2.2 | \
379 grep -q "linkdown" 377 grep -q "linkdown"
380 check_err $? 378 log_test $? 0 "IPv4 linkdown flag set"
381 ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 | \ 379 ip -netns testns -6 route get fibmatch 2001:db8:2::2 | \
382 grep -q "linkdown" 380 grep -q "linkdown"
383 check_err $? 381 log_test $? 0 "IPv6 linkdown flag set"
384 382
385 ip netns exec testns ip link del dev dummy0 383 cleanup
386
387 ip netns del testns
388
389 if [ $ret -ne 0 ]; then
390 echo "FAIL: unicast route carrier test"
391 return 1
392 fi
393 echo "PASS: unicast route carrier test"
394} 384}
395 385
396fib_carrier_test() 386fib_carrier_test()
397{ 387{
398 echo "Running netdev carrier change tests"
399
400 fib_carrier_local_test 388 fib_carrier_local_test
401 fib_carrier_unicast_test 389 fib_carrier_unicast_test
402} 390}
@@ -424,6 +412,9 @@ if [ $? -ne 0 ]; then
424 exit 0 412 exit 0
425fi 413fi
426 414
415# start clean
416cleanup &> /dev/null
417
427fib_test 418fib_test
428 419
429exit $ret 420exit $ret
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index e11fe84de0fd..5cc2a53bb71c 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -14,6 +14,9 @@
14 * - SOCK_DGRAM 14 * - SOCK_DGRAM
15 * - SOCK_RAW 15 * - SOCK_RAW
16 * 16 *
17 * PF_RDS
18 * - SOCK_SEQPACKET
19 *
17 * Start this program on two connected hosts, one in send mode and 20 * Start this program on two connected hosts, one in send mode and
18 * the other with option '-r' to put it in receiver mode. 21 * the other with option '-r' to put it in receiver mode.
19 * 22 *
@@ -53,6 +56,7 @@
53#include <sys/types.h> 56#include <sys/types.h>
54#include <sys/wait.h> 57#include <sys/wait.h>
55#include <unistd.h> 58#include <unistd.h>
59#include <linux/rds.h>
56 60
57#ifndef SO_EE_ORIGIN_ZEROCOPY 61#ifndef SO_EE_ORIGIN_ZEROCOPY
58#define SO_EE_ORIGIN_ZEROCOPY 5 62#define SO_EE_ORIGIN_ZEROCOPY 5
@@ -164,17 +168,39 @@ static int do_accept(int fd)
164 return fd; 168 return fd;
165} 169}
166 170
167static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy) 171static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie)
172{
173 struct cmsghdr *cm;
174
175 if (!msg->msg_control)
176 error(1, errno, "NULL cookie");
177 cm = (void *)msg->msg_control;
178 cm->cmsg_len = CMSG_LEN(sizeof(cookie));
179 cm->cmsg_level = SOL_RDS;
180 cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
181 memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
182}
183
184static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
168{ 185{
169 int ret, len, i, flags; 186 int ret, len, i, flags;
187 static uint32_t cookie;
188 char ckbuf[CMSG_SPACE(sizeof(cookie))];
170 189
171 len = 0; 190 len = 0;
172 for (i = 0; i < msg->msg_iovlen; i++) 191 for (i = 0; i < msg->msg_iovlen; i++)
173 len += msg->msg_iov[i].iov_len; 192 len += msg->msg_iov[i].iov_len;
174 193
175 flags = MSG_DONTWAIT; 194 flags = MSG_DONTWAIT;
176 if (do_zerocopy) 195 if (do_zerocopy) {
177 flags |= MSG_ZEROCOPY; 196 flags |= MSG_ZEROCOPY;
197 if (domain == PF_RDS) {
198 memset(&msg->msg_control, 0, sizeof(msg->msg_control));
199 msg->msg_controllen = CMSG_SPACE(sizeof(cookie));
200 msg->msg_control = (struct cmsghdr *)ckbuf;
201 add_zcopy_cookie(msg, ++cookie);
202 }
203 }
178 204
179 ret = sendmsg(fd, msg, flags); 205 ret = sendmsg(fd, msg, flags);
180 if (ret == -1 && errno == EAGAIN) 206 if (ret == -1 && errno == EAGAIN)
@@ -190,6 +216,10 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
190 if (do_zerocopy && ret) 216 if (do_zerocopy && ret)
191 expected_completions++; 217 expected_completions++;
192 } 218 }
219 if (do_zerocopy && domain == PF_RDS) {
220 msg->msg_control = NULL;
221 msg->msg_controllen = 0;
222 }
193 223
194 return true; 224 return true;
195} 225}
@@ -216,7 +246,9 @@ static void do_sendmsg_corked(int fd, struct msghdr *msg)
216 msg->msg_iov[0].iov_len = payload_len + extra_len; 246 msg->msg_iov[0].iov_len = payload_len + extra_len;
217 extra_len = 0; 247 extra_len = 0;
218 248
219 do_sendmsg(fd, msg, do_zerocopy); 249 do_sendmsg(fd, msg, do_zerocopy,
250 (cfg_dst_addr.ss_family == AF_INET ?
251 PF_INET : PF_INET6));
220 } 252 }
221 253
222 do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0); 254 do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
@@ -300,13 +332,38 @@ static int do_setup_tx(int domain, int type, int protocol)
300 if (cfg_zerocopy) 332 if (cfg_zerocopy)
301 do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1); 333 do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
302 334
303 if (domain != PF_PACKET) 335 if (domain != PF_PACKET && domain != PF_RDS)
304 if (connect(fd, (void *) &cfg_dst_addr, cfg_alen)) 336 if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
305 error(1, errno, "connect"); 337 error(1, errno, "connect");
306 338
339 if (domain == PF_RDS) {
340 if (bind(fd, (void *) &cfg_src_addr, cfg_alen))
341 error(1, errno, "bind");
342 }
343
307 return fd; 344 return fd;
308} 345}
309 346
347static int do_process_zerocopy_cookies(struct sock_extended_err *serr,
348 uint32_t *ckbuf, size_t nbytes)
349{
350 int ncookies, i;
351
352 if (serr->ee_errno != 0)
353 error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
354 ncookies = serr->ee_data;
355 if (ncookies > SO_EE_ORIGIN_MAX_ZCOOKIES)
356 error(1, 0, "Returned %d cookies, max expected %d\n",
357 ncookies, SO_EE_ORIGIN_MAX_ZCOOKIES);
358 if (nbytes != ncookies * sizeof(uint32_t))
359 error(1, 0, "Expected %d cookies, got %ld\n",
360 ncookies, nbytes/sizeof(uint32_t));
361 for (i = 0; i < ncookies; i++)
362 if (cfg_verbose >= 2)
363 fprintf(stderr, "%d\n", ckbuf[i]);
364 return ncookies;
365}
366
310static bool do_recv_completion(int fd) 367static bool do_recv_completion(int fd)
311{ 368{
312 struct sock_extended_err *serr; 369 struct sock_extended_err *serr;
@@ -315,10 +372,17 @@ static bool do_recv_completion(int fd)
315 uint32_t hi, lo, range; 372 uint32_t hi, lo, range;
316 int ret, zerocopy; 373 int ret, zerocopy;
317 char control[100]; 374 char control[100];
375 uint32_t ckbuf[SO_EE_ORIGIN_MAX_ZCOOKIES];
376 struct iovec iov;
318 377
319 msg.msg_control = control; 378 msg.msg_control = control;
320 msg.msg_controllen = sizeof(control); 379 msg.msg_controllen = sizeof(control);
321 380
381 iov.iov_base = ckbuf;
382 iov.iov_len = (SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(ckbuf[0]));
383 msg.msg_iov = &iov;
384 msg.msg_iovlen = 1;
385
322 ret = recvmsg(fd, &msg, MSG_ERRQUEUE); 386 ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
323 if (ret == -1 && errno == EAGAIN) 387 if (ret == -1 && errno == EAGAIN)
324 return false; 388 return false;
@@ -337,6 +401,11 @@ static bool do_recv_completion(int fd)
337 cm->cmsg_level, cm->cmsg_type); 401 cm->cmsg_level, cm->cmsg_type);
338 402
339 serr = (void *) CMSG_DATA(cm); 403 serr = (void *) CMSG_DATA(cm);
404
405 if (serr->ee_origin == SO_EE_ORIGIN_ZCOOKIE) {
406 completions += do_process_zerocopy_cookies(serr, ckbuf, ret);
407 return true;
408 }
340 if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) 409 if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
341 error(1, 0, "serr: wrong origin: %u", serr->ee_origin); 410 error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
342 if (serr->ee_errno != 0) 411 if (serr->ee_errno != 0)
@@ -444,6 +513,13 @@ static void do_tx(int domain, int type, int protocol)
444 msg.msg_iovlen++; 513 msg.msg_iovlen++;
445 } 514 }
446 515
516 if (domain == PF_RDS) {
517 msg.msg_name = &cfg_dst_addr;
518 msg.msg_namelen = (cfg_dst_addr.ss_family == AF_INET ?
519 sizeof(struct sockaddr_in) :
520 sizeof(struct sockaddr_in6));
521 }
522
447 iov[2].iov_base = payload; 523 iov[2].iov_base = payload;
448 iov[2].iov_len = cfg_payload_len; 524 iov[2].iov_len = cfg_payload_len;
449 msg.msg_iovlen++; 525 msg.msg_iovlen++;
@@ -454,7 +530,7 @@ static void do_tx(int domain, int type, int protocol)
454 if (cfg_cork) 530 if (cfg_cork)
455 do_sendmsg_corked(fd, &msg); 531 do_sendmsg_corked(fd, &msg);
456 else 532 else
457 do_sendmsg(fd, &msg, cfg_zerocopy); 533 do_sendmsg(fd, &msg, cfg_zerocopy, domain);
458 534
459 while (!do_poll(fd, POLLOUT)) { 535 while (!do_poll(fd, POLLOUT)) {
460 if (cfg_zerocopy) 536 if (cfg_zerocopy)
@@ -555,6 +631,40 @@ static void do_flush_datagram(int fd, int type)
555 bytes += cfg_payload_len; 631 bytes += cfg_payload_len;
556} 632}
557 633
634
635static void do_recvmsg(int fd)
636{
637 int ret, off = 0;
638 char *buf;
639 struct iovec iov;
640 struct msghdr msg;
641 struct sockaddr_storage din;
642
643 buf = calloc(cfg_payload_len, sizeof(char));
644 iov.iov_base = buf;
645 iov.iov_len = cfg_payload_len;
646
647 memset(&msg, 0, sizeof(msg));
648 msg.msg_name = &din;
649 msg.msg_namelen = sizeof(din);
650 msg.msg_iov = &iov;
651 msg.msg_iovlen = 1;
652
653 ret = recvmsg(fd, &msg, MSG_TRUNC);
654
655 if (ret == -1)
656 error(1, errno, "recv");
657 if (ret != cfg_payload_len)
658 error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
659
660 if (memcmp(buf + off, payload, ret))
661 error(1, 0, "recv: data mismatch");
662
663 free(buf);
664 packets++;
665 bytes += cfg_payload_len;
666}
667
558static void do_rx(int domain, int type, int protocol) 668static void do_rx(int domain, int type, int protocol)
559{ 669{
560 uint64_t tstop; 670 uint64_t tstop;
@@ -566,6 +676,8 @@ static void do_rx(int domain, int type, int protocol)
566 do { 676 do {
567 if (type == SOCK_STREAM) 677 if (type == SOCK_STREAM)
568 do_flush_tcp(fd); 678 do_flush_tcp(fd);
679 else if (domain == PF_RDS)
680 do_recvmsg(fd);
569 else 681 else
570 do_flush_datagram(fd, type); 682 do_flush_datagram(fd, type);
571 683
@@ -610,6 +722,7 @@ static void parse_opts(int argc, char **argv)
610 40 /* max tcp options */; 722 40 /* max tcp options */;
611 int c; 723 int c;
612 char *daddr = NULL, *saddr = NULL; 724 char *daddr = NULL, *saddr = NULL;
725 char *cfg_test;
613 726
614 cfg_payload_len = max_payload_len; 727 cfg_payload_len = max_payload_len;
615 728
@@ -667,6 +780,14 @@ static void parse_opts(int argc, char **argv)
667 break; 780 break;
668 } 781 }
669 } 782 }
783
784 cfg_test = argv[argc - 1];
785 if (strcmp(cfg_test, "rds") == 0) {
786 if (!daddr)
787 error(1, 0, "-D <server addr> required for PF_RDS\n");
788 if (!cfg_rx && !saddr)
789 error(1, 0, "-S <client addr> required for PF_RDS\n");
790 }
670 setup_sockaddr(cfg_family, daddr, &cfg_dst_addr); 791 setup_sockaddr(cfg_family, daddr, &cfg_dst_addr);
671 setup_sockaddr(cfg_family, saddr, &cfg_src_addr); 792 setup_sockaddr(cfg_family, saddr, &cfg_src_addr);
672 793
@@ -699,6 +820,8 @@ int main(int argc, char **argv)
699 do_test(cfg_family, SOCK_STREAM, 0); 820 do_test(cfg_family, SOCK_STREAM, 0);
700 else if (!strcmp(cfg_test, "udp")) 821 else if (!strcmp(cfg_test, "udp"))
701 do_test(cfg_family, SOCK_DGRAM, 0); 822 do_test(cfg_family, SOCK_DGRAM, 0);
823 else if (!strcmp(cfg_test, "rds"))
824 do_test(PF_RDS, SOCK_SEQPACKET, 0);
702 else 825 else
703 error(1, 0, "unknown cfg_test %s", cfg_test); 826 error(1, 0, "unknown cfg_test %s", cfg_test);
704 827
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 989f917068d1..d4346b16b2c1 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -128,6 +128,8 @@ static void sock_fanout_getopts(int fd, uint16_t *typeflags, uint16_t *group_id)
128 128
129static void sock_fanout_set_ebpf(int fd) 129static void sock_fanout_set_ebpf(int fd)
130{ 130{
131 static char log_buf[65536];
132
131 const int len_off = __builtin_offsetof(struct __sk_buff, len); 133 const int len_off = __builtin_offsetof(struct __sk_buff, len);
132 struct bpf_insn prog[] = { 134 struct bpf_insn prog[] = {
133 { BPF_ALU64 | BPF_MOV | BPF_X, 6, 1, 0, 0 }, 135 { BPF_ALU64 | BPF_MOV | BPF_X, 6, 1, 0, 0 },
@@ -140,7 +142,6 @@ static void sock_fanout_set_ebpf(int fd)
140 { BPF_ALU | BPF_MOV | BPF_K, 0, 0, 0, 0 }, 142 { BPF_ALU | BPF_MOV | BPF_K, 0, 0, 0, 0 },
141 { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 } 143 { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
142 }; 144 };
143 char log_buf[512];
144 union bpf_attr attr; 145 union bpf_attr attr;
145 int pfd; 146 int pfd;
146 147
diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
index 39fd362415cf..0f2698f9fd6d 100644
--- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -57,7 +57,7 @@ volatile int gotsig;
57 57
58void sighandler(int sig, siginfo_t *info, void *ctx) 58void sighandler(int sig, siginfo_t *info, void *ctx)
59{ 59{
60 struct ucontext *ucp = ctx; 60 ucontext_t *ucp = ctx;
61 61
62 if (!testing) { 62 if (!testing) {
63 signal(sig, SIG_DFL); 63 signal(sig, SIG_DFL);
diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README
index 970ff294fec8..3a0336782d2d 100644
--- a/tools/testing/selftests/tc-testing/README
+++ b/tools/testing/selftests/tc-testing/README
@@ -14,11 +14,11 @@ REQUIREMENTS
14 14
15* The kernel must have network namespace support 15* The kernel must have network namespace support
16 16
17* The kernel must have veth support available, as a veth pair is created 17* The kernel must have veth support available, as a veth pair is created
18 prior to running the tests. 18 prior to running the tests.
19 19
20* All tc-related features must be built in or available as modules. 20* All tc-related features being tested must be built in or available as
21 To check what is required in current setup run: 21 modules. To check what is required in current setup run:
22 ./tdc.py -c 22 ./tdc.py -c
23 23
24 Note: 24 Note:
@@ -44,10 +44,13 @@ using the -p option when running tdc:
44RUNNING TDC 44RUNNING TDC
45----------- 45-----------
46 46
47To use tdc, root privileges are required. tdc will not run otherwise. 47To use tdc, root privileges are required. This is because the
48commands being tested must be run as root. The code that enforces
49execution by root uid has been moved into a plugin (see PLUGIN
50ARCHITECTURE, below).
48 51
49All tests are executed inside a network namespace to prevent conflicts 52If nsPlugin is linked, all tests are executed inside a network
50within the host. 53namespace to prevent conflicts within the host.
51 54
52Running tdc without any arguments will run all tests. Refer to the section 55Running tdc without any arguments will run all tests. Refer to the section
53on command line arguments for more information, or run: 56on command line arguments for more information, or run:
@@ -59,6 +62,33 @@ output captured from the failing test will be printed immediately following
59the failed test in the TAP output. 62the failed test in the TAP output.
60 63
61 64
65OVERVIEW OF TDC EXECUTION
66-------------------------
67
68One run of tests is considered a "test suite" (this will be refined in the
69future). A test suite has one or more test cases in it.
70
71A test case has four stages:
72
73 - setup
74 - execute
75 - verify
76 - teardown
77
78The setup and teardown stages can run zero or more commands. The setup
79stage does some setup if the test needs it. The teardown stage undoes
80the setup and returns the system to a "neutral" state so any other test
81can be run next. These two stages require any commands run to return
82success, but do not otherwise verify the results.
83
84The execute and verify stages each run one command. The execute stage
85tests the return code against one or more acceptable values. The
86verify stage checks the return code for success, and also compares
87the stdout with a regular expression.
88
89Each of the commands in any stage will run in a shell instance.
90
91
62USER-DEFINED CONSTANTS 92USER-DEFINED CONSTANTS
63---------------------- 93----------------------
64 94
@@ -70,23 +100,132 @@ executed as part of the test. More will be added as test cases require.
70Example: 100Example:
71 $TC qdisc add dev $DEV1 ingress 101 $TC qdisc add dev $DEV1 ingress
72 102
103The NAMES values are used to substitute into the commands in the test cases.
104
73 105
74COMMAND LINE ARGUMENTS 106COMMAND LINE ARGUMENTS
75---------------------- 107----------------------
76 108
77Run tdc.py -h to see the full list of available arguments. 109Run tdc.py -h to see the full list of available arguments.
78 110
79-p PATH Specify the tc executable located at PATH to be used on this 111usage: tdc.py [-h] [-p PATH] [-D DIR [DIR ...]] [-f FILE [FILE ...]]
80 test run 112 [-c [CATG [CATG ...]]] [-e ID [ID ...]] [-l] [-s] [-i] [-v]
81-c Show the available test case categories in this test file 113 [-d DEVICE] [-n NS] [-V]
82-c CATEGORY Run only tests that belong to CATEGORY 114
83-f FILE Read test cases from the JSON file named FILE 115Linux TC unit tests
84-l [CATEGORY] List all test cases in the JSON file. If CATEGORY is 116
85 specified, list test cases matching that category. 117optional arguments:
86-s ID Show the test case matching ID 118 -h, --help show this help message and exit
87-e ID Execute the test case identified by ID 119 -p PATH, --path PATH The full path to the tc executable to use
88-i Generate unique ID numbers for test cases with no existing 120 -v, --verbose Show the commands that are being run
89 ID number 121 -d DEVICE, --device DEVICE
122 Execute the test case in flower category
123
124selection:
125 select which test cases: files plus directories; filtered by categories
126 plus testids
127
128 -D DIR [DIR ...], --directory DIR [DIR ...]
129 Collect tests from the specified directory(ies)
130 (default [tc-tests])
131 -f FILE [FILE ...], --file FILE [FILE ...]
132 Run tests from the specified file(s)
133 -c [CATG [CATG ...]], --category [CATG [CATG ...]]
134 Run tests only from the specified category/ies, or if
135 no category/ies is/are specified, list known
136 categories.
137 -e ID [ID ...], --execute ID [ID ...]
138 Execute the specified test cases with specified IDs
139
140action:
141 select action to perform on selected test cases
142
143 -l, --list List all test cases, or those only within the
144 specified category
145 -s, --show Display the selected test cases
146 -i, --id Generate ID numbers for new test cases
147
148netns:
149 options for nsPlugin(run commands in net namespace)
150
151 -n NS, --namespace NS
152 Run commands in namespace NS
153
154valgrind:
155 options for valgrindPlugin (run command under test under Valgrind)
156
157 -V, --valgrind Run commands under valgrind
158
159
160PLUGIN ARCHITECTURE
161-------------------
162
163There is now a plugin architecture, and some of the functionality that
164was in the tdc.py script has been moved into the plugins.
165
166The plugins are in the directory plugin-lib. The are executed from
167directory plugins. Put symbolic links from plugins to plugin-lib,
168and name them according to the order you want them to run.
169
170Example:
171
172bjb@bee:~/work/tc-testing$ ls -l plugins
173total 4
174lrwxrwxrwx 1 bjb bjb 27 Oct 4 16:12 10-rootPlugin.py -> ../plugin-lib/rootPlugin.py
175lrwxrwxrwx 1 bjb bjb 25 Oct 12 17:55 20-nsPlugin.py -> ../plugin-lib/nsPlugin.py
176-rwxr-xr-x 1 bjb bjb 0 Sep 29 15:56 __init__.py
177
178The plugins are a subclass of TdcPlugin, defined in TdcPlugin.py and
179must be called "SubPlugin" so tdc can find them. They are
180distinguished from each other in the python program by their module
181name.
182
183This base class supplies "hooks" to run extra functions. These hooks are as follows:
184
185pre- and post-suite
186pre- and post-case
187pre- and post-execute stage
188adjust-command (runs in all stages and receives the stage name)
189
190The pre-suite hook receives the number of tests and an array of test ids.
191This allows you to dump out the list of skipped tests in the event of a
192failure during setup or teardown stage.
193
194The pre-case hook receives the ordinal number and test id of the current test.
195
196The adjust-command hook receives the stage id (see list below) and the
197full command to be executed. This allows for last-minute adjustment
198of the command.
199
200The stages are identified by the following strings:
201
202 - pre (pre-suite)
203 - setup
204 - command
205 - verify
206 - teardown
207 - post (post-suite)
208
209
210To write a plugin, you need to inherit from TdcPlugin in
211TdcPlugin.py. To use the plugin, you have to put the
212implementation file in plugin-lib, and add a symbolic link to it from
213plugins. It will be detected at run time and invoked at the
214appropriate times. There are a few examples in the plugin-lib
215directory:
216
217 - rootPlugin.py:
218 implements the enforcement of running as root
219 - nsPlugin.py:
220 sets up a network namespace and runs all commands in that namespace
221 - valgrindPlugin.py
222 runs each command in the execute stage under valgrind,
223 and checks for leaks.
224 This plugin will output an extra test for each test in the test file,
225 one is the existing output as to whether the test passed or failed,
226 and the other is a test whether the command leaked memory or not.
227 (This one is a preliminary version, it may not work quite right yet,
228 but the overall template is there and it should only need tweaks.)
90 229
91 230
92ACKNOWLEDGEMENTS 231ACKNOWLEDGEMENTS
diff --git a/tools/testing/selftests/tc-testing/TODO.txt b/tools/testing/selftests/tc-testing/TODO.txt
index 6a266d811a78..c40698557e2f 100644
--- a/tools/testing/selftests/tc-testing/TODO.txt
+++ b/tools/testing/selftests/tc-testing/TODO.txt
@@ -5,6 +5,27 @@ tc Testing Suite To-Do list:
5 5
6- Add support for multiple versions of tc to run successively 6- Add support for multiple versions of tc to run successively
7 7
8- Improve error messages when tdc aborts its run 8- Improve error messages when tdc aborts its run. Partially done - still
9 need to better handle problems in pre- and post-suite.
9 10
10- Allow tdc to write its results to file 11- Use python logger module for debug/verbose output
12
13- Allow tdc to write its results to file.
14 Maybe use python logger module for this too.
15
16- A better implementation of the "hooks". Currently, every plugin
17 will attempt to run a function at every hook point. Could be
18 changed so that plugin __init__ methods will register functions to
19 be run in the various predefined times. Then if a plugin does not
20 require action at a specific point, no penalty will be paid for
21 trying to run a function that will do nothing.
22
23- Proper exception handling - make an exception class and use it
24
25- a TestCase class, for easier testcase handling, searching, comparison
26
27- a TestSuite class
28 and a way to configure a test suite,
29 to automate running multiple "test suites" with different requirements
30
31- super simple test case example using ls, touch, etc
diff --git a/tools/testing/selftests/tc-testing/TdcPlugin.py b/tools/testing/selftests/tc-testing/TdcPlugin.py
new file mode 100644
index 000000000000..3ee9a6dacb52
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/TdcPlugin.py
@@ -0,0 +1,74 @@
1#!/usr/bin/env python3
2
3class TdcPlugin:
4 def __init__(self):
5 super().__init__()
6 print(' -- {}.__init__'.format(self.sub_class))
7
8 def pre_suite(self, testcount, testidlist):
9 '''run commands before test_runner goes into a test loop'''
10 self.testcount = testcount
11 self.testidlist = testidlist
12 if self.args.verbose > 1:
13 print(' -- {}.pre_suite'.format(self.sub_class))
14
15 def post_suite(self, index):
16 '''run commands after test_runner completes the test loop
17 index is the last ordinal number of test that was attempted'''
18 if self.args.verbose > 1:
19 print(' -- {}.post_suite'.format(self.sub_class))
20
21 def pre_case(self, test_ordinal, testid):
22 '''run commands before test_runner does one test'''
23 if self.args.verbose > 1:
24 print(' -- {}.pre_case'.format(self.sub_class))
25 self.args.testid = testid
26 self.args.test_ordinal = test_ordinal
27
28 def post_case(self):
29 '''run commands after test_runner does one test'''
30 if self.args.verbose > 1:
31 print(' -- {}.post_case'.format(self.sub_class))
32
33 def pre_execute(self):
34 '''run command before test-runner does the execute step'''
35 if self.args.verbose > 1:
36 print(' -- {}.pre_execute'.format(self.sub_class))
37
38 def post_execute(self):
39 '''run command after test-runner does the execute step'''
40 if self.args.verbose > 1:
41 print(' -- {}.post_execute'.format(self.sub_class))
42
43 def adjust_command(self, stage, command):
44 '''adjust the command'''
45 if self.args.verbose > 1:
46 print(' -- {}.adjust_command {}'.format(self.sub_class, stage))
47
48 # if stage == 'pre':
49 # pass
50 # elif stage == 'setup':
51 # pass
52 # elif stage == 'execute':
53 # pass
54 # elif stage == 'verify':
55 # pass
56 # elif stage == 'teardown':
57 # pass
58 # elif stage == 'post':
59 # pass
60 # else:
61 # pass
62
63 return command
64
65 def add_args(self, parser):
66 '''Get the plugin args from the command line'''
67 self.argparser = parser
68 return self.argparser
69
70 def check_args(self, args, remaining):
71 '''Check that the args are set correctly'''
72 self.args = args
73 if self.args.verbose > 1:
74 print(' -- {}.check_args'.format(self.sub_class))
diff --git a/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt b/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt
new file mode 100644
index 000000000000..c18f88d09360
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt
@@ -0,0 +1,104 @@
1tdc - Adding plugins for tdc
2
3Author: Brenda J. Butler - bjb@mojatatu.com
4
5ADDING PLUGINS
6--------------
7
8A new plugin should be written in python as a class that inherits from TdcPlugin.
9There are some examples in plugin-lib.
10
11The plugin can be used to add functionality to the test framework,
12such as:
13
14- adding commands to be run before and/or after the test suite
15- adding commands to be run before and/or after the test cases
16- adding commands to be run before and/or after the execute phase of the test cases
17- ability to alter the command to be run in any phase:
18 pre (the pre-suite stage)
19 prepare
20 execute
21 verify
22 teardown
23 post (the post-suite stage)
24- ability to add to the command line args, and use them at run time
25
26
27The functions in the class should follow the following interfaces:
28
29 def __init__(self)
30 def pre_suite(self, testcount, testidlist) # see "PRE_SUITE" below
31 def post_suite(self, ordinal) # see "SKIPPING" below
32 def pre_case(self, test_ordinal, testid) # see "PRE_CASE" below
33 def post_case(self)
34 def pre_execute(self)
35 def post_execute(self)
36 def adjust_command(self, stage, command) # see "ADJUST" below
37 def add_args(self, parser) # see "ADD_ARGS" below
38 def check_args(self, args, remaining) # see "CHECK_ARGS" below
39
40
41PRE_SUITE
42
43This method takes a testcount (number of tests to be run) and
44testidlist (array of test ids for tests that will be run). This is
45useful for various things, including when an exception occurs and the
46rest of the tests must be skipped. The info is stored in the object,
47and the post_suite method can refer to it when dumping the "skipped"
48TAP output. The tdc.py script will do that for the test suite as
49defined in the test case, but if the plugin is being used to run extra
50tests on each test (eg, check for memory leaks on associated
51co-processes) then that other tap output can be generated in the
52post-suite method using this info passed in to the pre_suite method.
53
54
55SKIPPING
56
57The post_suite method will receive the ordinal number of the last
58test to be attempted. It can use this info when outputting
59the TAP output for the extra test cases.
60
61
62PRE_CASE
63
64The pre_case method will receive the ordinal number of the test
65and the test id. Useful for outputing the extra test results.
66
67
68ADJUST
69
70The adjust_command method receives a string representing
71the execution stage and a string which is the actual command to be
72executed. The plugin can adjust the command, based on the stage of
73execution.
74
75The stages are represented by the following strings:
76
77 'pre'
78 'setup'
79 'command'
80 'verify'
81 'teardown'
82 'post'
83
84The adjust_command method must return the adjusted command so tdc
85can use it.
86
87
88ADD_ARGS
89
90The add_args method receives the argparser object and can add
91arguments to it. Care should be taken that the new arguments do not
92conflict with any from tdc.py or from other plugins that will be used
93concurrently.
94
95The add_args method should return the argparser object.
96
97
98CHECK_ARGS
99
100The check_args method is so that the plugin can do validation on
101the args, if needed. If there is a problem, and Exception should
102be raised, with a string that explains the problem.
103
104eg: raise Exception('plugin xxx, arg -y is wrong, fix it')
diff --git a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
index 00438331ba47..17b267dedbd9 100644
--- a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
+++ b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
@@ -12,14 +12,18 @@ template.json for the required JSON format for test cases.
12Include the 'id' field, but do not assign a value. Running tdc with the -i 12Include the 'id' field, but do not assign a value. Running tdc with the -i
13option will generate a unique ID for that test case. 13option will generate a unique ID for that test case.
14 14
15tdc will recursively search the 'tc' subdirectory for .json files. Any 15tdc will recursively search the 'tc-tests' subdirectory (or the
16test case files you create in these directories will automatically be included. 16directories named with the -D option) for .json files. Any test case
17If you wish to store your custom test cases elsewhere, be sure to run tdc 17files you create in these directories will automatically be included.
18with the -f argument and the path to your file. 18If you wish to store your custom test cases elsewhere, be sure to run
19tdc with the -f argument and the path to your file, or the -D argument
20and the path to your directory(ies).
19 21
20Be aware of required escape characters in the JSON data - particularly when 22Be aware of required escape characters in the JSON data - particularly
21defining the match pattern. Refer to the tctests.json file for examples when 23when defining the match pattern. Refer to the supplied json test files
22in doubt. 24for examples when in doubt. The match pattern is written in json, and
25will be used by python. So the match pattern will be a python regular
26expression, but should be written using json syntax.
23 27
24 28
25TEST CASE STRUCTURE 29TEST CASE STRUCTURE
@@ -69,7 +73,8 @@ SETUP/TEARDOWN ERRORS
69If an error is detected during the setup/teardown process, execution of the 73If an error is detected during the setup/teardown process, execution of the
70tests will immediately stop with an error message and the namespace in which 74tests will immediately stop with an error message and the namespace in which
71the tests are run will be destroyed. This is to prevent inaccurate results 75the tests are run will be destroyed. This is to prevent inaccurate results
72in the test cases. 76in the test cases. tdc will output a series of TAP results for the skipped
77tests.
73 78
74Repeated failures of the setup/teardown may indicate a problem with the test 79Repeated failures of the setup/teardown may indicate a problem with the test
75case, or possibly even a bug in one of the commands that are not being tested. 80case, or possibly even a bug in one of the commands that are not being tested.
@@ -79,3 +84,17 @@ so that it doesn't halt the script for an error that doesn't matter. Turn the
79individual command into a list, with the command being first, followed by all 84individual command into a list, with the command being first, followed by all
80acceptable exit codes for the command. 85acceptable exit codes for the command.
81 86
87Example:
88
89A pair of setup commands. The first can have exit code 0, 1 or 255, the
90second must have exit code 0.
91
92 "setup": [
93 [
94 "$TC actions flush action gact",
95 0,
96 1,
97 255
98 ],
99 "$TC actions add action reclassify index 65536"
100 ],
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS b/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS
new file mode 100644
index 000000000000..aa8a2669702b
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS
@@ -0,0 +1,27 @@
1tdc.py will look for plugins in a directory plugins off the cwd.
2Make a set of numbered symbolic links from there to the actual plugins.
3Eg:
4
5tdc.py
6plugin-lib/
7plugins/
8 __init__.py
9 10-rootPlugin.py -> ../plugin-lib/rootPlugin.py
10 20-valgrindPlugin.py -> ../plugin-lib/valgrindPlugin.py
11 30-nsPlugin.py -> ../plugin-lib/nsPlugin.py
12
13
14tdc.py will find them and use them.
15
16
17rootPlugin
18 Check if the uid is root. If not, bail out.
19
20valgrindPlugin
21 Run the command under test with valgrind, and produce an extra set of TAP results for the memory tests.
22 This plugin will write files to the cwd, called vgnd-xxx.log. These will contain
23 the valgrind output for test xxx. Any file matching the glob 'vgnd-*.log' will be
24 deleted at the end of the run.
25
26nsPlugin
27 Run all the commands in a network namespace.
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
new file mode 100644
index 000000000000..a194b1af2b30
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -0,0 +1,141 @@
1import os
2import signal
3from string import Template
4import subprocess
5import time
6from TdcPlugin import TdcPlugin
7
8from tdc_config import *
9
10class SubPlugin(TdcPlugin):
11 def __init__(self):
12 self.sub_class = 'ns/SubPlugin'
13 super().__init__()
14
15 def pre_suite(self, testcount, testidlist):
16 '''run commands before test_runner goes into a test loop'''
17 super().pre_suite(testcount, testidlist)
18
19 if self.args.namespace:
20 self._ns_create()
21
22 def post_suite(self, index):
23 '''run commands after test_runner goes into a test loop'''
24 super().post_suite(index)
25 if self.args.verbose:
26 print('{}.post_suite'.format(self.sub_class))
27
28 if self.args.namespace:
29 self._ns_destroy()
30
31 def add_args(self, parser):
32 super().add_args(parser)
33 self.argparser_group = self.argparser.add_argument_group(
34 'netns',
35 'options for nsPlugin(run commands in net namespace)')
36 self.argparser_group.add_argument(
37 '-n', '--namespace', action='store_true',
38 help='Run commands in namespace')
39 return self.argparser
40
41 def adjust_command(self, stage, command):
42 super().adjust_command(stage, command)
43 cmdform = 'list'
44 cmdlist = list()
45
46 if not self.args.namespace:
47 return command
48
49 if self.args.verbose:
50 print('{}.adjust_command'.format(self.sub_class))
51
52 if not isinstance(command, list):
53 cmdform = 'str'
54 cmdlist = command.split()
55 else:
56 cmdlist = command
57 if stage == 'setup' or stage == 'execute' or stage == 'verify' or stage == 'teardown':
58 if self.args.verbose:
59 print('adjust_command: stage is {}; inserting netns stuff in command [{}] list [{}]'.format(stage, command, cmdlist))
60 cmdlist.insert(0, self.args.NAMES['NS'])
61 cmdlist.insert(0, 'exec')
62 cmdlist.insert(0, 'netns')
63 cmdlist.insert(0, 'ip')
64 else:
65 pass
66
67 if cmdform == 'str':
68 command = ' '.join(cmdlist)
69 else:
70 command = cmdlist
71
72 if self.args.verbose:
73 print('adjust_command: return command [{}]'.format(command))
74 return command
75
76 def _ns_create(self):
77 '''
78 Create the network namespace in which the tests will be run and set up
79 the required network devices for it.
80 '''
81 if self.args.namespace:
82 cmd = 'ip netns add {}'.format(self.args.NAMES['NS'])
83 self._exec_cmd('pre', cmd)
84 cmd = 'ip link add $DEV0 type veth peer name $DEV1'
85 self._exec_cmd('pre', cmd)
86 cmd = 'ip link set $DEV1 netns {}'.format(self.args.NAMES['NS'])
87 self._exec_cmd('pre', cmd)
88 cmd = 'ip link set $DEV0 up'
89 self._exec_cmd('pre', cmd)
90 cmd = 'ip -n {} link set $DEV1 up'.format(self.args.NAMES['NS'])
91 self._exec_cmd('pre', cmd)
92 if self.args.device:
93 cmd = 'ip link set $DEV2 netns {}'.format(self.args.NAMES['NS'])
94 self._exec_cmd('pre', cmd)
95 cmd = 'ip -n {} link set $DEV2 up'.format(self.args.NAMES['NS'])
96 self._exec_cmd('pre', cmd)
97
98 def _ns_destroy(self):
99 '''
100 Destroy the network namespace for testing (and any associated network
101 devices as well)
102 '''
103 if self.args.namespace:
104 cmd = 'ip netns delete {}'.format(self.args.NAMES['NS'])
105 self._exec_cmd('post', cmd)
106
107 def _exec_cmd(self, stage, command):
108 '''
109 Perform any required modifications on an executable command, then run
110 it in a subprocess and return the results.
111 '''
112 if '$' in command:
113 command = self._replace_keywords(command)
114
115 self.adjust_command(stage, command)
116 if self.args.verbose:
117 print('_exec_cmd: command "{}"'.format(command))
118 proc = subprocess.Popen(command,
119 shell=True,
120 stdout=subprocess.PIPE,
121 stderr=subprocess.PIPE,
122 env=ENVIR)
123 (rawout, serr) = proc.communicate()
124
125 if proc.returncode != 0 and len(serr) > 0:
126 foutput = serr.decode("utf-8")
127 else:
128 foutput = rawout.decode("utf-8")
129
130 proc.stdout.close()
131 proc.stderr.close()
132 return proc, foutput
133
134 def _replace_keywords(self, cmd):
135 """
136 For a given executable command, substitute any known
137 variables contained within NAMES with the correct values
138 """
139 tcmd = Template(cmd)
140 subcmd = tcmd.safe_substitute(self.args.NAMES)
141 return subcmd
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py
new file mode 100644
index 000000000000..e36775bd4d12
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py
@@ -0,0 +1,19 @@
1import os
2import sys
3from TdcPlugin import TdcPlugin
4
5from tdc_config import *
6
7
8class SubPlugin(TdcPlugin):
9 def __init__(self):
10 self.sub_class = 'root/SubPlugin'
11 super().__init__()
12
13 def pre_suite(self, testcount, testidlist):
14 # run commands before test_runner goes into a test loop
15 super().pre_suite(testcount, testidlist)
16
17 if os.geteuid():
18 print('This script must be run with root privileges', file=sys.stderr)
19 exit(1)
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py
new file mode 100644
index 000000000000..477a7bd7d7fb
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py
@@ -0,0 +1,142 @@
1'''
2run the command under test, under valgrind and collect memory leak info
3as a separate test.
4'''
5
6
7import os
8import re
9import signal
10from string import Template
11import subprocess
12import time
13from TdcPlugin import TdcPlugin
14
15from tdc_config import *
16
17def vp_extract_num_from_string(num_as_string_maybe_with_commas):
18 return int(num_as_string_maybe_with_commas.replace(',',''))
19
20class SubPlugin(TdcPlugin):
21 def __init__(self):
22 self.sub_class = 'valgrind/SubPlugin'
23 self.tap = ''
24 super().__init__()
25
26 def pre_suite(self, testcount, testidlist):
27 '''run commands before test_runner goes into a test loop'''
28 super().pre_suite(testcount, testidlist)
29 if self.args.verbose > 1:
30 print('{}.pre_suite'.format(self.sub_class))
31 if self.args.valgrind:
32 self._add_to_tap('1..{}\n'.format(self.testcount))
33
34 def post_suite(self, index):
35 '''run commands after test_runner goes into a test loop'''
36 super().post_suite(index)
37 self._add_to_tap('\n|---\n')
38 if self.args.verbose > 1:
39 print('{}.post_suite'.format(self.sub_class))
40 print('{}'.format(self.tap))
41 if self.args.verbose < 4:
42 subprocess.check_output('rm -f vgnd-*.log', shell=True)
43
44 def add_args(self, parser):
45 super().add_args(parser)
46 self.argparser_group = self.argparser.add_argument_group(
47 'valgrind',
48 'options for valgrindPlugin (run command under test under Valgrind)')
49
50 self.argparser_group.add_argument(
51 '-V', '--valgrind', action='store_true',
52 help='Run commands under valgrind')
53
54 return self.argparser
55
56 def adjust_command(self, stage, command):
57 super().adjust_command(stage, command)
58 cmdform = 'list'
59 cmdlist = list()
60
61 if not self.args.valgrind:
62 return command
63
64 if self.args.verbose > 1:
65 print('{}.adjust_command'.format(self.sub_class))
66
67 if not isinstance(command, list):
68 cmdform = 'str'
69 cmdlist = command.split()
70 else:
71 cmdlist = command
72
73 if stage == 'execute':
74 if self.args.verbose > 1:
75 print('adjust_command: stage is {}; inserting valgrind stuff in command [{}] list [{}]'.
76 format(stage, command, cmdlist))
77 cmdlist.insert(0, '--track-origins=yes')
78 cmdlist.insert(0, '--show-leak-kinds=definite,indirect')
79 cmdlist.insert(0, '--leak-check=full')
80 cmdlist.insert(0, '--log-file=vgnd-{}.log'.format(self.args.testid))
81 cmdlist.insert(0, '-v') # ask for summary of non-leak errors
82 cmdlist.insert(0, ENVIR['VALGRIND_BIN'])
83 else:
84 pass
85
86 if cmdform == 'str':
87 command = ' '.join(cmdlist)
88 else:
89 command = cmdlist
90
91 if self.args.verbose > 1:
92 print('adjust_command: return command [{}]'.format(command))
93 return command
94
95 def post_execute(self):
96 if not self.args.valgrind:
97 return
98
99 self.definitely_lost_re = re.compile(
100 r'definitely lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\sblocks', re.MULTILINE | re.DOTALL)
101 self.indirectly_lost_re = re.compile(
102 r'indirectly lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL)
103 self.possibly_lost_re = re.compile(
104 r'possibly lost:\s+([,0-9]+)bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL)
105 self.non_leak_error_re = re.compile(
106 r'ERROR SUMMARY:\s+([,0-9]+) errors from\s+([,0-9]+)\s+contexts', re.MULTILINE | re.DOTALL)
107
108 def_num = 0
109 ind_num = 0
110 pos_num = 0
111 nle_num = 0
112
113 # what about concurrent test runs? Maybe force them to be in different directories?
114 with open('vgnd-{}.log'.format(self.args.testid)) as vfd:
115 content = vfd.read()
116 def_mo = self.definitely_lost_re.search(content)
117 ind_mo = self.indirectly_lost_re.search(content)
118 pos_mo = self.possibly_lost_re.search(content)
119 nle_mo = self.non_leak_error_re.search(content)
120
121 if def_mo:
122 def_num = int(def_mo.group(2))
123 if ind_mo:
124 ind_num = int(ind_mo.group(2))
125 if pos_mo:
126 pos_num = int(pos_mo.group(2))
127 if nle_mo:
128 nle_num = int(nle_mo.group(1))
129
130 mem_results = ''
131 if (def_num > 0) or (ind_num > 0) or (pos_num > 0) or (nle_num > 0):
132 mem_results += 'not '
133
134 mem_results += 'ok {} - {}-mem # {}\n'.format(
135 self.args.test_ordinal, self.args.testid, 'memory leak check')
136 self._add_to_tap(mem_results)
137 if mem_results.startswith('not '):
138 print('{}'.format(content))
139 self._add_to_tap(content)
140
141 def _add_to_tap(self, more_tap_output):
142 self.tap += more_tap_output
diff --git a/tools/testing/selftests/tc-testing/plugins/__init__.py b/tools/testing/selftests/tc-testing/plugins/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugins/__init__.py
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index fc373fdf2bdc..b3754b9aa302 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -11,16 +11,88 @@ import re
11import os 11import os
12import sys 12import sys
13import argparse 13import argparse
14import importlib
14import json 15import json
15import subprocess 16import subprocess
17import time
16from collections import OrderedDict 18from collections import OrderedDict
17from string import Template 19from string import Template
18 20
19from tdc_config import * 21from tdc_config import *
20from tdc_helper import * 22from tdc_helper import *
21 23
22 24import TdcPlugin
23USE_NS = True 25
26class PluginMgr:
27 def __init__(self, argparser):
28 super().__init__()
29 self.plugins = {}
30 self.plugin_instances = []
31 self.args = []
32 self.argparser = argparser
33
34 # TODO, put plugins in order
35 plugindir = os.getenv('TDC_PLUGIN_DIR', './plugins')
36 for dirpath, dirnames, filenames in os.walk(plugindir):
37 for fn in filenames:
38 if (fn.endswith('.py') and
39 not fn == '__init__.py' and
40 not fn.startswith('#') and
41 not fn.startswith('.#')):
42 mn = fn[0:-3]
43 foo = importlib.import_module('plugins.' + mn)
44 self.plugins[mn] = foo
45 self.plugin_instances.append(foo.SubPlugin())
46
47 def call_pre_suite(self, testcount, testidlist):
48 for pgn_inst in self.plugin_instances:
49 pgn_inst.pre_suite(testcount, testidlist)
50
51 def call_post_suite(self, index):
52 for pgn_inst in reversed(self.plugin_instances):
53 pgn_inst.post_suite(index)
54
55 def call_pre_case(self, test_ordinal, testid):
56 for pgn_inst in self.plugin_instances:
57 try:
58 pgn_inst.pre_case(test_ordinal, testid)
59 except Exception as ee:
60 print('exception {} in call to pre_case for {} plugin'.
61 format(ee, pgn_inst.__class__))
62 print('test_ordinal is {}'.format(test_ordinal))
63 print('testid is {}'.format(testid))
64 raise
65
66 def call_post_case(self):
67 for pgn_inst in reversed(self.plugin_instances):
68 pgn_inst.post_case()
69
70 def call_pre_execute(self):
71 for pgn_inst in self.plugin_instances:
72 pgn_inst.pre_execute()
73
74 def call_post_execute(self):
75 for pgn_inst in reversed(self.plugin_instances):
76 pgn_inst.post_execute()
77
78 def call_add_args(self, parser):
79 for pgn_inst in self.plugin_instances:
80 parser = pgn_inst.add_args(parser)
81 return parser
82
83 def call_check_args(self, args, remaining):
84 for pgn_inst in self.plugin_instances:
85 pgn_inst.check_args(args, remaining)
86
87 def call_adjust_command(self, stage, command):
88 for pgn_inst in self.plugin_instances:
89 command = pgn_inst.adjust_command(stage, command)
90 return command
91
92 @staticmethod
93 def _make_argparser(args):
94 self.argparser = argparse.ArgumentParser(
95 description='Linux TC unit tests')
24 96
25 97
26def replace_keywords(cmd): 98def replace_keywords(cmd):
@@ -33,21 +105,24 @@ def replace_keywords(cmd):
33 return subcmd 105 return subcmd
34 106
35 107
36def exec_cmd(command, nsonly=True): 108def exec_cmd(args, pm, stage, command):
37 """ 109 """
38 Perform any required modifications on an executable command, then run 110 Perform any required modifications on an executable command, then run
39 it in a subprocess and return the results. 111 it in a subprocess and return the results.
40 """ 112 """
41 if (USE_NS and nsonly): 113 if len(command.strip()) == 0:
42 command = 'ip netns exec $NS ' + command 114 return None, None
43
44 if '$' in command: 115 if '$' in command:
45 command = replace_keywords(command) 116 command = replace_keywords(command)
46 117
118 command = pm.call_adjust_command(stage, command)
119 if args.verbose > 0:
120 print('command "{}"'.format(command))
47 proc = subprocess.Popen(command, 121 proc = subprocess.Popen(command,
48 shell=True, 122 shell=True,
49 stdout=subprocess.PIPE, 123 stdout=subprocess.PIPE,
50 stderr=subprocess.PIPE) 124 stderr=subprocess.PIPE,
125 env=ENVIR)
51 (rawout, serr) = proc.communicate() 126 (rawout, serr) = proc.communicate()
52 127
53 if proc.returncode != 0 and len(serr) > 0: 128 if proc.returncode != 0 and len(serr) > 0:
@@ -60,36 +135,85 @@ def exec_cmd(command, nsonly=True):
60 return proc, foutput 135 return proc, foutput
61 136
62 137
63def prepare_env(cmdlist): 138def prepare_env(args, pm, stage, prefix, cmdlist):
64 """ 139 """
65 Execute the setup/teardown commands for a test case. Optionally 140 Execute the setup/teardown commands for a test case.
66 terminate test execution if the command fails. 141 Optionally terminate test execution if the command fails.
67 """ 142 """
143 if args.verbose > 0:
144 print('{}'.format(prefix))
68 for cmdinfo in cmdlist: 145 for cmdinfo in cmdlist:
69 if (type(cmdinfo) == list): 146 if isinstance(cmdinfo, list):
70 exit_codes = cmdinfo[1:] 147 exit_codes = cmdinfo[1:]
71 cmd = cmdinfo[0] 148 cmd = cmdinfo[0]
72 else: 149 else:
73 exit_codes = [0] 150 exit_codes = [0]
74 cmd = cmdinfo 151 cmd = cmdinfo
75 152
76 if (len(cmd) == 0): 153 if not cmd:
77 continue 154 continue
78 155
79 (proc, foutput) = exec_cmd(cmd) 156 (proc, foutput) = exec_cmd(args, pm, stage, cmd)
157
158 if proc and (proc.returncode not in exit_codes):
159 print('', file=sys.stderr)
160 print("{} *** Could not execute: \"{}\"".format(prefix, cmd),
161 file=sys.stderr)
162 print("\n{} *** Error message: \"{}\"".format(prefix, foutput),
163 file=sys.stderr)
164 print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr)
165 print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr)
166 print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr)
167 raise Exception('"{}" did not complete successfully'.format(prefix))
168
169def run_one_test(pm, args, index, tidx):
170 result = True
171 tresult = ""
172 tap = ""
173 if args.verbose > 0:
174 print("\t====================\n=====> ", end="")
175 print("Test " + tidx["id"] + ": " + tidx["name"])
176
177 pm.call_pre_case(index, tidx['id'])
178 prepare_env(args, pm, 'setup', "-----> prepare stage", tidx["setup"])
179
180 if (args.verbose > 0):
181 print('-----> execute stage')
182 pm.call_pre_execute()
183 (p, procout) = exec_cmd(args, pm, 'execute', tidx["cmdUnderTest"])
184 exit_code = p.returncode
185 pm.call_post_execute()
186
187 if (exit_code != int(tidx["expExitCode"])):
188 result = False
189 print("exit:", exit_code, int(tidx["expExitCode"]))
190 print(procout)
191 else:
192 if args.verbose > 0:
193 print('-----> verify stage')
194 match_pattern = re.compile(
195 str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE)
196 (p, procout) = exec_cmd(args, pm, 'verify', tidx["verifyCmd"])
197 match_index = re.findall(match_pattern, procout)
198 if len(match_index) != int(tidx["matchCount"]):
199 result = False
200
201 if not result:
202 tresult += 'not '
203 tresult += 'ok {} - {} # {}\n'.format(str(index), tidx['id'], tidx['name'])
204 tap += tresult
205
206 if result == False:
207 tap += procout
80 208
81 if proc.returncode not in exit_codes: 209 prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'])
82 print 210 pm.call_post_case()
83 print("Could not execute:")
84 print(cmd)
85 print("\nError message:")
86 print(foutput)
87 print("\nAborting test run.")
88 ns_destroy()
89 exit(1)
90 211
212 index += 1
213
214 return tap
91 215
92def test_runner(filtered_tests, args): 216def test_runner(pm, args, filtered_tests):
93 """ 217 """
94 Driver function for the unit tests. 218 Driver function for the unit tests.
95 219
@@ -102,74 +226,39 @@ def test_runner(filtered_tests, args):
102 tcount = len(testlist) 226 tcount = len(testlist)
103 index = 1 227 index = 1
104 tap = str(index) + ".." + str(tcount) + "\n" 228 tap = str(index) + ".." + str(tcount) + "\n"
229 badtest = None
105 230
231 pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist])
232
233 if args.verbose > 1:
234 print('Run tests here')
106 for tidx in testlist: 235 for tidx in testlist:
107 result = True
108 tresult = ""
109 if "flower" in tidx["category"] and args.device == None: 236 if "flower" in tidx["category"] and args.device == None:
110 continue 237 continue
111 print("Test " + tidx["id"] + ": " + tidx["name"]) 238 try:
112 prepare_env(tidx["setup"]) 239 badtest = tidx # in case it goes bad
113 (p, procout) = exec_cmd(tidx["cmdUnderTest"]) 240 tap += run_one_test(pm, args, index, tidx)
114 exit_code = p.returncode 241 except Exception as ee:
115 242 print('Exception {} (caught in test_runner, running test {} {} {})'.
116 if (exit_code != int(tidx["expExitCode"])): 243 format(ee, index, tidx['id'], tidx['name']))
117 result = False 244 break
118 print("exit:", exit_code, int(tidx["expExitCode"]))
119 print(procout)
120 else:
121 match_pattern = re.compile(str(tidx["matchPattern"]), re.DOTALL)
122 (p, procout) = exec_cmd(tidx["verifyCmd"])
123 match_index = re.findall(match_pattern, procout)
124 if len(match_index) != int(tidx["matchCount"]):
125 result = False
126
127 if result == True:
128 tresult += "ok "
129 else:
130 tresult += "not ok "
131 tap += tresult + str(index) + " " + tidx["id"] + " " + tidx["name"] + "\n"
132
133 if result == False:
134 tap += procout
135
136 prepare_env(tidx["teardown"])
137 index += 1 245 index += 1
138 246
139 return tap 247 # if we failed in setup or teardown,
140 248 # fill in the remaining tests with not ok
249 count = index
250 tap += 'about to flush the tap output if tests need to be skipped\n'
251 if tcount + 1 != index:
252 for tidx in testlist[index - 1:]:
253 msg = 'skipped - previous setup or teardown failed'
254 tap += 'ok {} - {} # {} {} {}\n'.format(
255 count, tidx['id'], msg, index, badtest.get('id', '--Unknown--'))
256 count += 1
141 257
142def ns_create(): 258 tap += 'done flushing skipped test tap output\n'
143 """ 259 pm.call_post_suite(index)
144 Create the network namespace in which the tests will be run and set up
145 the required network devices for it.
146 """
147 if (USE_NS):
148 cmd = 'ip netns add $NS'
149 exec_cmd(cmd, False)
150 cmd = 'ip link add $DEV0 type veth peer name $DEV1'
151 exec_cmd(cmd, False)
152 cmd = 'ip link set $DEV1 netns $NS'
153 exec_cmd(cmd, False)
154 cmd = 'ip link set $DEV0 up'
155 exec_cmd(cmd, False)
156 cmd = 'ip -n $NS link set $DEV1 up'
157 exec_cmd(cmd, False)
158 cmd = 'ip link set $DEV2 netns $NS'
159 exec_cmd(cmd, False)
160 cmd = 'ip -n $NS link set $DEV2 up'
161 exec_cmd(cmd, False)
162
163
164def ns_destroy():
165 """
166 Destroy the network namespace for testing (and any associated network
167 devices as well)
168 """
169 if (USE_NS):
170 cmd = 'ip netns delete $NS'
171 exec_cmd(cmd, False)
172 260
261 return tap
173 262
174def has_blank_ids(idlist): 263def has_blank_ids(idlist):
175 """ 264 """
@@ -209,29 +298,50 @@ def set_args(parser):
209 """ 298 """
210 Set the command line arguments for tdc. 299 Set the command line arguments for tdc.
211 """ 300 """
212 parser.add_argument('-p', '--path', type=str, 301 parser.add_argument(
213 help='The full path to the tc executable to use') 302 '-p', '--path', type=str,
214 parser.add_argument('-c', '--category', type=str, nargs='?', const='+c', 303 help='The full path to the tc executable to use')
215 help='Run tests only from the specified category, or if no category is specified, list known categories.') 304 sg = parser.add_argument_group(
216 parser.add_argument('-f', '--file', type=str, 305 'selection', 'select which test cases: ' +
217 help='Run tests from the specified file') 306 'files plus directories; filtered by categories plus testids')
218 parser.add_argument('-l', '--list', type=str, nargs='?', const="++", metavar='CATEGORY', 307 ag = parser.add_argument_group(
219 help='List all test cases, or those only within the specified category') 308 'action', 'select action to perform on selected test cases')
220 parser.add_argument('-s', '--show', type=str, nargs=1, metavar='ID', dest='showID', 309
221 help='Display the test case with specified id') 310 sg.add_argument(
222 parser.add_argument('-e', '--execute', type=str, nargs=1, metavar='ID', 311 '-D', '--directory', nargs='+', metavar='DIR',
223 help='Execute the single test case with specified ID') 312 help='Collect tests from the specified directory(ies) ' +
224 parser.add_argument('-i', '--id', action='store_true', dest='gen_id', 313 '(default [tc-tests])')
225 help='Generate ID numbers for new test cases') 314 sg.add_argument(
315 '-f', '--file', nargs='+', metavar='FILE',
316 help='Run tests from the specified file(s)')
317 sg.add_argument(
318 '-c', '--category', nargs='*', metavar='CATG', default=['+c'],
319 help='Run tests only from the specified category/ies, ' +
320 'or if no category/ies is/are specified, list known categories.')
321 sg.add_argument(
322 '-e', '--execute', nargs='+', metavar='ID',
323 help='Execute the specified test cases with specified IDs')
324 ag.add_argument(
325 '-l', '--list', action='store_true',
326 help='List all test cases, or those only within the specified category')
327 ag.add_argument(
328 '-s', '--show', action='store_true', dest='showID',
329 help='Display the selected test cases')
330 ag.add_argument(
331 '-i', '--id', action='store_true', dest='gen_id',
332 help='Generate ID numbers for new test cases')
333 parser.add_argument(
334 '-v', '--verbose', action='count', default=0,
335 help='Show the commands that are being run')
226 parser.add_argument('-d', '--device', 336 parser.add_argument('-d', '--device',
227 help='Execute the test case in flower category') 337 help='Execute the test case in flower category')
228 return parser 338 return parser
229 339
230 340
231def check_default_settings(args): 341def check_default_settings(args, remaining, pm):
232 """ 342 """
233 Process any arguments overriding the default settings, and ensure the 343 Process any arguments overriding the default settings,
234 settings are correct. 344 and ensure the settings are correct.
235 """ 345 """
236 # Allow for overriding specific settings 346 # Allow for overriding specific settings
237 global NAMES 347 global NAMES
@@ -244,6 +354,8 @@ def check_default_settings(args):
244 print("The specified tc path " + NAMES['TC'] + " does not exist.") 354 print("The specified tc path " + NAMES['TC'] + " does not exist.")
245 exit(1) 355 exit(1)
246 356
357 pm.call_check_args(args, remaining)
358
247 359
248def get_id_list(alltests): 360def get_id_list(alltests):
249 """ 361 """
@@ -300,40 +412,107 @@ def generate_case_ids(alltests):
300 json.dump(testlist, outfile, indent=4) 412 json.dump(testlist, outfile, indent=4)
301 outfile.close() 413 outfile.close()
302 414
415def filter_tests_by_id(args, testlist):
416 '''
417 Remove tests from testlist that are not in the named id list.
418 If id list is empty, return empty list.
419 '''
420 newlist = list()
421 if testlist and args.execute:
422 target_ids = args.execute
423
424 if isinstance(target_ids, list) and (len(target_ids) > 0):
425 newlist = list(filter(lambda x: x['id'] in target_ids, testlist))
426 return newlist
427
428def filter_tests_by_category(args, testlist):
429 '''
430 Remove tests from testlist that are not in a named category.
431 '''
432 answer = list()
433 if args.category and testlist:
434 test_ids = list()
435 for catg in set(args.category):
436 if catg == '+c':
437 continue
438 print('considering category {}'.format(catg))
439 for tc in testlist:
440 if catg in tc['category'] and tc['id'] not in test_ids:
441 answer.append(tc)
442 test_ids.append(tc['id'])
443
444 return answer
303 445
304def get_test_cases(args): 446def get_test_cases(args):
305 """ 447 """
306 If a test case file is specified, retrieve tests from that file. 448 If a test case file is specified, retrieve tests from that file.
307 Otherwise, glob for all json files in subdirectories and load from 449 Otherwise, glob for all json files in subdirectories and load from
308 each one. 450 each one.
451 Also, if requested, filter by category, and add tests matching
452 certain ids.
309 """ 453 """
310 import fnmatch 454 import fnmatch
311 if args.file != None: 455
312 if not os.path.isfile(args.file): 456 flist = []
313 print("The specified test case file " + args.file + " does not exist.") 457 testdirs = ['tc-tests']
314 exit(1) 458
315 flist = [args.file] 459 if args.file:
316 else: 460 # at least one file was specified - remove the default directory
317 flist = [] 461 testdirs = []
318 for root, dirnames, filenames in os.walk('tc-tests'): 462
463 for ff in args.file:
464 if not os.path.isfile(ff):
465 print("IGNORING file " + ff + "\n\tBECAUSE does not exist.")
466 else:
467 flist.append(os.path.abspath(ff))
468
469 if args.directory:
470 testdirs = args.directory
471
472 for testdir in testdirs:
473 for root, dirnames, filenames in os.walk(testdir):
319 for filename in fnmatch.filter(filenames, '*.json'): 474 for filename in fnmatch.filter(filenames, '*.json'):
320 flist.append(os.path.join(root, filename)) 475 candidate = os.path.abspath(os.path.join(root, filename))
321 alltests = list() 476 if candidate not in testdirs:
477 flist.append(candidate)
478
479 alltestcases = list()
322 for casefile in flist: 480 for casefile in flist:
323 alltests = alltests + (load_from_file(casefile)) 481 alltestcases = alltestcases + (load_from_file(casefile))
324 return alltests 482
483 allcatlist = get_test_categories(alltestcases)
484 allidlist = get_id_list(alltestcases)
485
486 testcases_by_cats = get_categorized_testlist(alltestcases, allcatlist)
487 idtestcases = filter_tests_by_id(args, alltestcases)
488 cattestcases = filter_tests_by_category(args, alltestcases)
489
490 cat_ids = [x['id'] for x in cattestcases]
491 if args.execute:
492 if args.category:
493 alltestcases = cattestcases + [x for x in idtestcases if x['id'] not in cat_ids]
494 else:
495 alltestcases = idtestcases
496 else:
497 if cat_ids:
498 alltestcases = cattestcases
499 else:
500 # just accept the existing value of alltestcases,
501 # which has been filtered by file/directory
502 pass
503
504 return allcatlist, allidlist, testcases_by_cats, alltestcases
325 505
326 506
327def set_operation_mode(args): 507def set_operation_mode(pm, args):
328 """ 508 """
329 Load the test case data and process remaining arguments to determine 509 Load the test case data and process remaining arguments to determine
330 what the script should do for this run, and call the appropriate 510 what the script should do for this run, and call the appropriate
331 function. 511 function.
332 """ 512 """
333 alltests = get_test_cases(args) 513 ucat, idlist, testcases, alltests = get_test_cases(args)
334 514
335 if args.gen_id: 515 if args.gen_id:
336 idlist = get_id_list(alltests)
337 if (has_blank_ids(idlist)): 516 if (has_blank_ids(idlist)):
338 alltests = generate_case_ids(alltests) 517 alltests = generate_case_ids(alltests)
339 else: 518 else:
@@ -347,70 +526,26 @@ def set_operation_mode(args):
347 print("Please correct them before continuing.") 526 print("Please correct them before continuing.")
348 exit(1) 527 exit(1)
349 528
350 ucat = get_test_categories(alltests)
351
352 if args.showID: 529 if args.showID:
353 show_test_case_by_id(alltests, args.showID[0]) 530 for atest in alltests:
531 print_test_case(atest)
354 exit(0) 532 exit(0)
355 533
356 if args.execute: 534 if isinstance(args.category, list) and (len(args.category) == 0):
357 target_id = args.execute[0] 535 print("Available categories:")
358 else: 536 print_sll(ucat)
359 target_id = "" 537 exit(0)
360
361 if args.category:
362 if (args.category == '+c'):
363 print("Available categories:")
364 print_sll(ucat)
365 exit(0)
366 else:
367 target_category = args.category
368 else:
369 target_category = ""
370
371
372 testcases = get_categorized_testlist(alltests, ucat)
373 538
374 if args.list: 539 if args.list:
375 if (args.list == "++"): 540 if args.list:
376 list_test_cases(alltests) 541 list_test_cases(alltests)
377 exit(0) 542 exit(0)
378 elif(len(args.list) > 0):
379 if (args.list not in ucat):
380 print("Unknown category " + args.list)
381 print("Available categories:")
382 print_sll(ucat)
383 exit(1)
384 list_test_cases(testcases[args.list])
385 exit(0)
386
387 if (os.geteuid() != 0):
388 print("This script must be run with root privileges.\n")
389 exit(1)
390
391 ns_create()
392
393 if (len(target_category) == 0):
394 if (len(target_id) > 0):
395 alltests = list(filter(lambda x: target_id in x['id'], alltests))
396 if (len(alltests) == 0):
397 print("Cannot find a test case with ID matching " + target_id)
398 exit(1)
399 catresults = test_runner(alltests, args)
400 print("All test results: " + "\n\n" + catresults)
401 elif (len(target_category) > 0):
402 if (target_category == "flower") and args.device == None:
403 print("Please specify a NIC device (-d) to run category flower")
404 exit(1)
405 if (target_category not in ucat):
406 print("Specified category is not present in this file.")
407 exit(1)
408 else:
409 catresults = test_runner(testcases[target_category], args)
410 print("Category " + target_category + "\n\n" + catresults)
411
412 ns_destroy()
413 543
544 if len(alltests):
545 catresults = test_runner(pm, args, alltests)
546 else:
547 catresults = 'No tests found\n'
548 print('All test results: \n\n{}'.format(catresults))
414 549
415def main(): 550def main():
416 """ 551 """
@@ -419,10 +554,15 @@ def main():
419 """ 554 """
420 parser = args_parse() 555 parser = args_parse()
421 parser = set_args(parser) 556 parser = set_args(parser)
557 pm = PluginMgr(parser)
558 parser = pm.call_add_args(parser)
422 (args, remaining) = parser.parse_known_args() 559 (args, remaining) = parser.parse_known_args()
423 check_default_settings(args) 560 args.NAMES = NAMES
561 check_default_settings(args, remaining, pm)
562 if args.verbose > 2:
563 print('args is {}'.format(args))
424 564
425 set_operation_mode(args) 565 set_operation_mode(pm, args)
426 566
427 exit(0) 567 exit(0)
428 568
diff --git a/tools/testing/selftests/tc-testing/tdc_helper.py b/tools/testing/selftests/tc-testing/tdc_helper.py
index db381120a566..9f35c96c88a0 100644
--- a/tools/testing/selftests/tc-testing/tdc_helper.py
+++ b/tools/testing/selftests/tc-testing/tdc_helper.py
@@ -57,20 +57,11 @@ def print_sll(items):
57 57
58def print_test_case(tcase): 58def print_test_case(tcase):
59 """ Pretty-printing of a given test case. """ 59 """ Pretty-printing of a given test case. """
60 print('\n==============\nTest {}\t{}\n'.format(tcase['id'], tcase['name']))
60 for k in tcase.keys(): 61 for k in tcase.keys():
61 if (isinstance(tcase[k], list)): 62 if (isinstance(tcase[k], list)):
62 print(k + ":") 63 print(k + ":")
63 print_list(tcase[k]) 64 print_list(tcase[k])
64 else: 65 else:
65 print(k + ": " + tcase[k]) 66 if not ((k == 'id') or (k == 'name')):
66 67 print(k + ": " + str(tcase[k]))
67
68def show_test_case_by_id(testlist, caseID):
69 """ Find the specified test case to pretty-print. """
70 if not any(d.get('id', None) == caseID for d in testlist):
71 print("That ID does not exist.")
72 exit(1)
73 else:
74 print_test_case(next((d for d in testlist if d['id'] == caseID)))
75
76
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 10ca46df1449..d744991c0f4f 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -5,16 +5,26 @@ include ../lib.mk
5 5
6.PHONY: all all_32 all_64 warn_32bit_failure clean 6.PHONY: all all_32 all_64 warn_32bit_failure clean
7 7
8TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \ 8UNAME_M := $(shell uname -m)
9 check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \ 9CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
10CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
11
12TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
13 check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
10 protection_keys test_vdso test_vsyscall 14 protection_keys test_vdso test_vsyscall
11TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ 15TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
12 test_FCMOV test_FCOMI test_FISTTP \ 16 test_FCMOV test_FCOMI test_FISTTP \
13 vdso_restorer 17 vdso_restorer
14TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip 5lvl 18TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
19# Some selftests require 32bit support enabled also on 64bit systems
20TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
15 21
16TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) 22TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) $(TARGETS_C_32BIT_NEEDED)
17TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY) 23TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
24ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),11)
25TARGETS_C_64BIT_ALL += $(TARGETS_C_32BIT_NEEDED)
26endif
27
18BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) 28BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
19BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64) 29BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64)
20 30
@@ -23,10 +33,6 @@ BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
23 33
24CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie 34CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie
25 35
26UNAME_M := $(shell uname -m)
27CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
28CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
29
30define gen-target-rule-32 36define gen-target-rule-32
31$(1) $(1)_32: $(OUTPUT)/$(1)_32 37$(1) $(1)_32: $(OUTPUT)/$(1)_32
32.PHONY: $(1) $(1)_32 38.PHONY: $(1) $(1)_32
@@ -40,12 +46,14 @@ endef
40ifeq ($(CAN_BUILD_I386),1) 46ifeq ($(CAN_BUILD_I386),1)
41all: all_32 47all: all_32
42TEST_PROGS += $(BINARIES_32) 48TEST_PROGS += $(BINARIES_32)
49EXTRA_CFLAGS += -DCAN_BUILD_32
43$(foreach t,$(TARGETS_C_32BIT_ALL),$(eval $(call gen-target-rule-32,$(t)))) 50$(foreach t,$(TARGETS_C_32BIT_ALL),$(eval $(call gen-target-rule-32,$(t))))
44endif 51endif
45 52
46ifeq ($(CAN_BUILD_X86_64),1) 53ifeq ($(CAN_BUILD_X86_64),1)
47all: all_64 54all: all_64
48TEST_PROGS += $(BINARIES_64) 55TEST_PROGS += $(BINARIES_64)
56EXTRA_CFLAGS += -DCAN_BUILD_64
49$(foreach t,$(TARGETS_C_64BIT_ALL),$(eval $(call gen-target-rule-64,$(t)))) 57$(foreach t,$(TARGETS_C_64BIT_ALL),$(eval $(call gen-target-rule-64,$(t))))
50endif 58endif
51 59
diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c
index ec0f6b45ce8b..9c0325e1ea68 100644
--- a/tools/testing/selftests/x86/mpx-mini-test.c
+++ b/tools/testing/selftests/x86/mpx-mini-test.c
@@ -315,11 +315,39 @@ static inline void *__si_bounds_upper(siginfo_t *si)
315 return si->si_upper; 315 return si->si_upper;
316} 316}
317#else 317#else
318
319/*
320 * This deals with old version of _sigfault in some distros:
321 *
322
323old _sigfault:
324 struct {
325 void *si_addr;
326 } _sigfault;
327
328new _sigfault:
329 struct {
330 void __user *_addr;
331 int _trapno;
332 short _addr_lsb;
333 union {
334 struct {
335 void __user *_lower;
336 void __user *_upper;
337 } _addr_bnd;
338 __u32 _pkey;
339 };
340 } _sigfault;
341 *
342 */
343
318static inline void **__si_bounds_hack(siginfo_t *si) 344static inline void **__si_bounds_hack(siginfo_t *si)
319{ 345{
320 void *sigfault = &si->_sifields._sigfault; 346 void *sigfault = &si->_sifields._sigfault;
321 void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault); 347 void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault);
322 void **__si_lower = end_sigfault; 348 int *trapno = (int*)end_sigfault;
349 /* skip _trapno and _addr_lsb */
350 void **__si_lower = (void**)(trapno + 2);
323 351
324 return __si_lower; 352 return __si_lower;
325} 353}
@@ -331,7 +359,7 @@ static inline void *__si_bounds_lower(siginfo_t *si)
331 359
332static inline void *__si_bounds_upper(siginfo_t *si) 360static inline void *__si_bounds_upper(siginfo_t *si)
333{ 361{
334 return (*__si_bounds_hack(si)) + sizeof(void *); 362 return *(__si_bounds_hack(si) + 1);
335} 363}
336#endif 364#endif
337 365
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
index bc1b0735bb50..f15aa5a76fe3 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -393,34 +393,6 @@ pid_t fork_lazy_child(void)
393 return forkret; 393 return forkret;
394} 394}
395 395
396void davecmp(void *_a, void *_b, int len)
397{
398 int i;
399 unsigned long *a = _a;
400 unsigned long *b = _b;
401
402 for (i = 0; i < len / sizeof(*a); i++) {
403 if (a[i] == b[i])
404 continue;
405
406 dprintf3("[%3d]: a: %016lx b: %016lx\n", i, a[i], b[i]);
407 }
408}
409
410void dumpit(char *f)
411{
412 int fd = open(f, O_RDONLY);
413 char buf[100];
414 int nr_read;
415
416 dprintf2("maps fd: %d\n", fd);
417 do {
418 nr_read = read(fd, &buf[0], sizeof(buf));
419 write(1, buf, nr_read);
420 } while (nr_read > 0);
421 close(fd);
422}
423
424#define PKEY_DISABLE_ACCESS 0x1 396#define PKEY_DISABLE_ACCESS 0x1
425#define PKEY_DISABLE_WRITE 0x2 397#define PKEY_DISABLE_WRITE 0x2
426 398
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index a48da95c18fd..ddfdd635de16 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -119,7 +119,9 @@ static void check_result(void)
119 119
120int main() 120int main()
121{ 121{
122#ifdef CAN_BUILD_32
122 int tmp; 123 int tmp;
124#endif
123 125
124 sethandler(SIGTRAP, sigtrap, 0); 126 sethandler(SIGTRAP, sigtrap, 0);
125 127
@@ -139,12 +141,13 @@ int main()
139 : : "c" (post_nop) : "r11"); 141 : : "c" (post_nop) : "r11");
140 check_result(); 142 check_result();
141#endif 143#endif
142 144#ifdef CAN_BUILD_32
143 printf("[RUN]\tSet TF and check int80\n"); 145 printf("[RUN]\tSet TF and check int80\n");
144 set_eflags(get_eflags() | X86_EFLAGS_TF); 146 set_eflags(get_eflags() | X86_EFLAGS_TF);
145 asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid) 147 asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
146 : INT80_CLOBBERS); 148 : INT80_CLOBBERS);
147 check_result(); 149 check_result();
150#endif
148 151
149 /* 152 /*
150 * This test is particularly interesting if fast syscalls use 153 * This test is particularly interesting if fast syscalls use
diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c
index bf0d687c7db7..64f11c8d9b76 100644
--- a/tools/testing/selftests/x86/test_mremap_vdso.c
+++ b/tools/testing/selftests/x86/test_mremap_vdso.c
@@ -90,8 +90,12 @@ int main(int argc, char **argv, char **envp)
90 vdso_size += PAGE_SIZE; 90 vdso_size += PAGE_SIZE;
91 } 91 }
92 92
93#ifdef __i386__
93 /* Glibc is likely to explode now - exit with raw syscall */ 94 /* Glibc is likely to explode now - exit with raw syscall */
94 asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (!!ret)); 95 asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (!!ret));
96#else /* __x86_64__ */
97 syscall(SYS_exit, ret);
98#endif
95 } else { 99 } else {
96 int status; 100 int status;
97 101
diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
index 29973cde06d3..235259011704 100644
--- a/tools/testing/selftests/x86/test_vdso.c
+++ b/tools/testing/selftests/x86/test_vdso.c
@@ -26,20 +26,59 @@
26# endif 26# endif
27#endif 27#endif
28 28
29/* max length of lines in /proc/self/maps - anything longer is skipped here */
30#define MAPS_LINE_LEN 128
31
29int nerrs = 0; 32int nerrs = 0;
30 33
34typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
35
36getcpu_t vgetcpu;
37getcpu_t vdso_getcpu;
38
39static void *vsyscall_getcpu(void)
40{
31#ifdef __x86_64__ 41#ifdef __x86_64__
32# define VSYS(x) (x) 42 FILE *maps;
43 char line[MAPS_LINE_LEN];
44 bool found = false;
45
46 maps = fopen("/proc/self/maps", "r");
47 if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */
48 return NULL;
49
50 while (fgets(line, MAPS_LINE_LEN, maps)) {
51 char r, x;
52 void *start, *end;
53 char name[MAPS_LINE_LEN];
54
55 /* sscanf() is safe here as strlen(name) >= strlen(line) */
56 if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
57 &start, &end, &r, &x, name) != 5)
58 continue;
59
60 if (strcmp(name, "[vsyscall]"))
61 continue;
62
63 /* assume entries are OK, as we test vDSO here not vsyscall */
64 found = true;
65 break;
66 }
67
68 fclose(maps);
69
70 if (!found) {
71 printf("Warning: failed to find vsyscall getcpu\n");
72 return NULL;
73 }
74 return (void *) (0xffffffffff600800);
33#else 75#else
34# define VSYS(x) 0 76 return NULL;
35#endif 77#endif
78}
36 79
37typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
38
39const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
40getcpu_t vdso_getcpu;
41 80
42void fill_function_pointers() 81static void fill_function_pointers()
43{ 82{
44 void *vdso = dlopen("linux-vdso.so.1", 83 void *vdso = dlopen("linux-vdso.so.1",
45 RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); 84 RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
@@ -54,6 +93,8 @@ void fill_function_pointers()
54 vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu"); 93 vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
55 if (!vdso_getcpu) 94 if (!vdso_getcpu)
56 printf("Warning: failed to find getcpu in vDSO\n"); 95 printf("Warning: failed to find getcpu in vDSO\n");
96
97 vgetcpu = (getcpu_t) vsyscall_getcpu();
57} 98}
58 99
59static long sys_getcpu(unsigned * cpu, unsigned * node, 100static long sys_getcpu(unsigned * cpu, unsigned * node,
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index 7a744fa7b786..be81621446f0 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -33,6 +33,9 @@
33# endif 33# endif
34#endif 34#endif
35 35
36/* max length of lines in /proc/self/maps - anything longer is skipped here */
37#define MAPS_LINE_LEN 128
38
36static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), 39static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
37 int flags) 40 int flags)
38{ 41{
@@ -98,7 +101,7 @@ static int init_vsys(void)
98#ifdef __x86_64__ 101#ifdef __x86_64__
99 int nerrs = 0; 102 int nerrs = 0;
100 FILE *maps; 103 FILE *maps;
101 char line[128]; 104 char line[MAPS_LINE_LEN];
102 bool found = false; 105 bool found = false;
103 106
104 maps = fopen("/proc/self/maps", "r"); 107 maps = fopen("/proc/self/maps", "r");
@@ -108,10 +111,12 @@ static int init_vsys(void)
108 return 0; 111 return 0;
109 } 112 }
110 113
111 while (fgets(line, sizeof(line), maps)) { 114 while (fgets(line, MAPS_LINE_LEN, maps)) {
112 char r, x; 115 char r, x;
113 void *start, *end; 116 void *start, *end;
114 char name[128]; 117 char name[MAPS_LINE_LEN];
118
119 /* sscanf() is safe here as strlen(name) >= strlen(line) */
115 if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", 120 if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
116 &start, &end, &r, &x, name) != 5) 121 &start, &end, &r, &x, name) != 5)
117 continue; 122 continue;