summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-12-22 11:16:31 -0500
committerDavid S. Miller <davem@davemloft.net>2017-12-22 11:16:31 -0500
commitfba961ab29e5ffb055592442808bb0f7962e05da (patch)
tree5180c384b79399c469e0ed88211114e6ab249484
parent0a80f0c26bf5a131892b91db5318eb67608006d2 (diff)
parentead68f216110170ec729e2c4dec0aad6d38259d7 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Lots of overlapping changes. Also on the net-next side the XDP state management is handled more in the generic layers so undo the 'net' nfp fix which isn't applicable in net-next. Include a necessary change by Jakub Kicinski, with log message: ==================== cls_bpf no longer takes care of offload tracking. Make sure netdevsim performs necessary checks. This fixes a warning caused by TC trying to remove a filter it has not added. Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com> Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com> ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt2
-rw-r--r--Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt18
-rw-r--r--Makefile2
-rw-r--r--arch/arm/lib/csumpartialcopyuser.S4
-rw-r--r--arch/arm64/kvm/hyp/debug-sr.c3
-rw-r--r--arch/parisc/boot/compressed/misc.c4
-rw-r--r--arch/parisc/include/asm/thread_info.h5
-rw-r--r--arch/parisc/kernel/entry.S12
-rw-r--r--arch/parisc/kernel/hpmc.S1
-rw-r--r--arch/parisc/kernel/unwind.c1
-rw-r--r--arch/parisc/lib/delay.c2
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c6
-rw-r--r--arch/s390/net/bpf_jit_comp.c11
-rw-r--r--arch/sparc/mm/fault_32.c2
-rw-r--r--arch/sparc/mm/fault_64.c2
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c6
-rw-r--r--arch/um/kernel/trap.c2
-rw-r--r--arch/x86/entry/entry_32.S6
-rw-r--r--arch/x86/entry/entry_64.S189
-rw-r--r--arch/x86/entry/entry_64_compat.S7
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/desc.h11
-rw-r--r--arch/x86/include/asm/fixmap.h68
-rw-r--r--arch/x86/include/asm/hypervisor.h25
-rw-r--r--arch/x86/include/asm/irqflags.h3
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/paravirt.h9
-rw-r--r--arch/x86/include/asm/processor.h59
-rw-r--r--arch/x86/include/asm/stacktrace.h3
-rw-r--r--arch/x86/include/asm/switch_to.h8
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/traps.h1
-rw-r--r--arch/x86/include/asm/unwind.h7
-rw-r--r--arch/x86/kernel/asm-offsets.c6
-rw-r--r--arch/x86/kernel/asm-offsets_32.c9
-rw-r--r--arch/x86/kernel/asm-offsets_64.c4
-rw-r--r--arch/x86/kernel/cpu/common.c170
-rw-r--r--arch/x86/kernel/doublefault.c36
-rw-r--r--arch/x86/kernel/dumpstack.c74
-rw-r--r--arch/x86/kernel/dumpstack_32.c6
-rw-r--r--arch/x86/kernel/dumpstack_64.c6
-rw-r--r--arch/x86/kernel/ioport.c2
-rw-r--r--arch/x86/kernel/irq.c12
-rw-r--r--arch/x86/kernel/irq_64.c4
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c2
-rw-r--r--arch/x86/kernel/process.c19
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c14
-rw-r--r--arch/x86/kernel/traps.c69
-rw-r--r--arch/x86/kernel/unwind_orc.c88
-rw-r--r--arch/x86/kernel/vmlinux.lds.S9
-rw-r--r--arch/x86/kvm/emulate.c32
-rw-r--r--arch/x86/kvm/mmu.c8
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c48
-rw-r--r--arch/x86/lib/delay.c4
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/kasan_init_64.c18
-rw-r--r--arch/x86/power/cpu.c16
-rw-r--r--arch/x86/xen/enlighten_pv.c2
-rw-r--r--arch/x86/xen/mmu_pv.c2
-rw-r--r--block/bio.c2
-rw-r--r--block/blk-map.c38
-rw-r--r--block/blk-throttle.c8
-rw-r--r--block/bounce.c6
-rw-r--r--block/kyber-iosched.c37
-rw-r--r--drivers/acpi/apei/erst.c2
-rw-r--r--drivers/acpi/cppc_acpi.c2
-rw-r--r--drivers/block/null_blk.c4
-rw-r--r--drivers/cpufreq/cpufreq_governor.c19
-rw-r--r--drivers/cpufreq/imx6q-cpufreq.c11
-rw-r--r--drivers/dma/at_hdmac.c4
-rw-r--r--drivers/dma/dma-jz4740.c4
-rw-r--r--drivers/dma/dmatest.c55
-rw-r--r--drivers/dma/fsl-edma.c28
-rw-r--r--drivers/dma/ioat/init.c2
-rw-r--r--drivers/mfd/cros_ec_spi.c53
-rw-r--r--drivers/mfd/twl4030-audio.c9
-rw-r--r--drivers/mfd/twl6040.c12
-rw-r--r--drivers/misc/pti.c2
-rw-r--r--drivers/mtd/mtdcore.c2
-rw-r--r--drivers/mtd/nand/brcmnand/brcmnand.c2
-rw-r--r--drivers/mtd/nand/gpio.c6
-rw-r--r--drivers/mtd/nand/gpmi-nand/gpmi-nand.c6
-rw-r--r--drivers/net/ethernet/arc/emac.h2
-rw-r--r--drivers/net/ethernet/arc/emac_main.c164
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c4
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c8
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c63
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c75
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/rl.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vxlan.c64
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vxlan.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c15
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.c30
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h8
-rw-r--r--drivers/net/ethernet/qualcomm/emac/emac.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c5
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/enh_desc.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/norm_desc.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c2
-rw-r--r--drivers/net/netdevsim/bpf.c25
-rw-r--r--drivers/net/phy/marvell.c15
-rw-r--r--drivers/net/phy/mdio-xgene.c21
-rw-r--r--drivers/net/vxlan.c19
-rw-r--r--drivers/net/wireless/mac80211_hwsim.c3
-rw-r--r--drivers/nvme/host/core.c11
-rw-r--r--drivers/nvme/host/fc.c1
-rw-r--r--drivers/parisc/lba_pci.c33
-rw-r--r--drivers/pci/pci-driver.c7
-rw-r--r--drivers/s390/net/qeth_core_main.c9
-rw-r--r--drivers/scsi/aacraid/aacraid.h1
-rw-r--r--drivers/scsi/aacraid/linit.c2
-rw-r--r--drivers/scsi/osd/osd_initiator.c4
-rw-r--r--drivers/scsi/scsi_devinfo.c6
-rw-r--r--drivers/scsi/scsi_scan.c13
-rw-r--r--drivers/scsi/scsi_sysfs.c5
-rw-r--r--drivers/scsi/scsi_transport_spi.c12
-rw-r--r--drivers/spi/spi-armada-3700.c8
-rw-r--r--drivers/spi/spi-atmel.c2
-rw-r--r--drivers/spi/spi-rspi.c4
-rw-r--r--drivers/spi/spi-sun4i.c2
-rw-r--r--drivers/spi/spi-xilinx.c11
-rw-r--r--drivers/target/target_core_pscsi.c4
-rw-r--r--fs/cramfs/Kconfig1
-rw-r--r--fs/exec.c7
-rw-r--r--fs/ext4/extents.c1
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c9
-rw-r--r--fs/ext4/namei.c4
-rw-r--r--fs/namespace.c1
-rw-r--r--fs/super.c37
-rw-r--r--include/kvm/arm_arch_timer.h2
-rw-r--r--include/linux/bio.h2
-rw-r--r--include/linux/blk_types.h9
-rw-r--r--include/linux/blkdev.h25
-rw-r--r--include/linux/bpf_verifier.h4
-rw-r--r--include/linux/intel-pti.h (renamed from include/linux/pti.h)6
-rw-r--r--include/linux/ipv6.h3
-rw-r--r--include/linux/mfd/rtsx_pci.h2
-rw-r--r--include/linux/mlx5/driver.h3
-rw-r--r--include/linux/mlx5/mlx5_ifc.h8
-rw-r--r--include/linux/spi/spi.h2
-rw-r--r--include/net/cfg80211.h1
-rw-r--r--include/net/pkt_cls.h5
-rw-r--r--include/trace/events/kvm.h7
-rw-r--r--kernel/bpf/verifier.c283
-rw-r--r--kernel/time/posix-timers.c29
-rw-r--r--lib/test_bpf.c43
-rw-r--r--mm/backing-dev.c5
-rw-r--r--net/bridge/br_netlink.c11
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/skbuff.c7
-rw-r--r--net/ipv4/fib_frontend.c9
-rw-r--r--net/ipv4/fib_semantics.c8
-rw-r--r--net/ipv4/ip_gre.c1
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/ip6_gre.c1
-rw-r--r--net/ipv6/ip6_output.c12
-rw-r--r--net/ipv6/ip6_tunnel.c9
-rw-r--r--net/ipv6/ipv6_sockglue.c1
-rw-r--r--net/ipv6/route.c19
-rw-r--r--net/openvswitch/flow.c15
-rw-r--r--net/sched/cls_bpf.c93
-rw-r--r--net/sctp/debug.c3
-rw-r--r--net/sctp/ulpqueue.c24
-rw-r--r--net/tipc/group.c16
-rw-r--r--net/wireless/Makefile31
-rw-r--r--net/wireless/certs/sforshee.hex86
-rw-r--r--net/wireless/certs/sforshee.x509bin680 -> 0 bytes
-rw-r--r--net/wireless/nl80211.c6
-rw-r--r--sound/core/rawmidi.c15
-rw-r--r--sound/pci/hda/patch_hdmi.c6
-rw-r--r--sound/pci/hda/patch_realtek.c35
-rw-r--r--sound/usb/mixer.c27
-rw-r--r--sound/usb/quirks.c7
-rw-r--r--tools/arch/s390/include/uapi/asm/bpf_perf_event.h2
-rwxr-xr-xtools/kvm/kvm_stat/kvm_stat74
-rw-r--r--tools/kvm/kvm_stat/kvm_stat.txt4
-rw-r--r--tools/testing/selftests/bpf/Makefile2
-rwxr-xr-xtools/testing/selftests/bpf/test_offload.py4
-rw-r--r--tools/testing/selftests/bpf/test_progs.c8
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c629
-rw-r--r--tools/testing/selftests/net/config1
-rw-r--r--virt/kvm/arm/arch_timer.c40
-rw-r--r--virt/kvm/arm/arm.c2
-rw-r--r--virt/kvm/arm/mmio.c6
-rw-r--r--virt/kvm/arm/mmu.c10
202 files changed, 2851 insertions, 1137 deletions
diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
index 376fa2f50e6b..956bb046e599 100644
--- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
+++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
@@ -13,7 +13,6 @@ Required properties:
13 at25df321a 13 at25df321a
14 at25df641 14 at25df641
15 at26df081a 15 at26df081a
16 en25s64
17 mr25h128 16 mr25h128
18 mr25h256 17 mr25h256
19 mr25h10 18 mr25h10
@@ -33,7 +32,6 @@ Required properties:
33 s25fl008k 32 s25fl008k
34 s25fl064k 33 s25fl064k
35 sst25vf040b 34 sst25vf040b
36 sst25wf040b
37 m25p40 35 m25p40
38 m25p80 36 m25p80
39 m25p16 37 m25p16
diff --git a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
index 5bf13960f7f4..e3c48b20b1a6 100644
--- a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
+++ b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
@@ -12,24 +12,30 @@ Required properties:
12 - "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc 12 - "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc
13- reg : Offset and length of the register set for the device 13- reg : Offset and length of the register set for the device
14- interrupts : Should contain CSPI/eCSPI interrupt 14- interrupts : Should contain CSPI/eCSPI interrupt
15- cs-gpios : Specifies the gpio pins to be used for chipselects.
16- clocks : Clock specifiers for both ipg and per clocks. 15- clocks : Clock specifiers for both ipg and per clocks.
17- clock-names : Clock names should include both "ipg" and "per" 16- clock-names : Clock names should include both "ipg" and "per"
18See the clock consumer binding, 17See the clock consumer binding,
19 Documentation/devicetree/bindings/clock/clock-bindings.txt 18 Documentation/devicetree/bindings/clock/clock-bindings.txt
20- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
21 Documentation/devicetree/bindings/dma/dma.txt
22- dma-names: DMA request names should include "tx" and "rx" if present.
23 19
24Obsolete properties: 20Recommended properties:
25- fsl,spi-num-chipselects : Contains the number of the chipselect 21- cs-gpios : GPIOs to use as chip selects, see spi-bus.txt. While the native chip
22select lines can be used, they appear to always generate a pulse between each
23word of a transfer. Most use cases will require GPIO based chip selects to
24generate a valid transaction.
26 25
27Optional properties: 26Optional properties:
27- num-cs : Number of total chip selects, see spi-bus.txt.
28- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
29Documentation/devicetree/bindings/dma/dma.txt.
30- dma-names: DMA request names, if present, should include "tx" and "rx".
28- fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register 31- fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register
29controlling the SPI_READY handling. Note that to enable the DRCTL consideration, 32controlling the SPI_READY handling. Note that to enable the DRCTL consideration,
30the SPI_READY mode-flag needs to be set too. 33the SPI_READY mode-flag needs to be set too.
31Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst). 34Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst).
32 35
36Obsolete properties:
37- fsl,spi-num-chipselects : Contains the number of the chipselect
38
33Example: 39Example:
34 40
35ecspi@70010000 { 41ecspi@70010000 {
diff --git a/Makefile b/Makefile
index 3f4d157add54..7e02f951b284 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
2VERSION = 4 2VERSION = 4
3PATCHLEVEL = 15 3PATCHLEVEL = 15
4SUBLEVEL = 0 4SUBLEVEL = 0
5EXTRAVERSION = -rc3 5EXTRAVERSION = -rc4
6NAME = Fearless Coyote 6NAME = Fearless Coyote
7 7
8# *DOCUMENTATION* 8# *DOCUMENTATION*
diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
index 1712f132b80d..b83fdc06286a 100644
--- a/arch/arm/lib/csumpartialcopyuser.S
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -85,7 +85,11 @@
85 .pushsection .text.fixup,"ax" 85 .pushsection .text.fixup,"ax"
86 .align 4 86 .align 4
879001: mov r4, #-EFAULT 879001: mov r4, #-EFAULT
88#ifdef CONFIG_CPU_SW_DOMAIN_PAN
89 ldr r5, [sp, #9*4] @ *err_ptr
90#else
88 ldr r5, [sp, #8*4] @ *err_ptr 91 ldr r5, [sp, #8*4] @ *err_ptr
92#endif
89 str r4, [r5] 93 str r4, [r5]
90 ldmia sp, {r1, r2} @ retrieve dst, len 94 ldmia sp, {r1, r2} @ retrieve dst, len
91 add r2, r2, r1 95 add r2, r2, r1
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index 321c9c05dd9e..f4363d40e2cd 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -74,6 +74,9 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
74{ 74{
75 u64 reg; 75 u64 reg;
76 76
77 /* Clear pmscr in case of early return */
78 *pmscr_el1 = 0;
79
77 /* SPE present on this CPU? */ 80 /* SPE present on this CPU? */
78 if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1), 81 if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
79 ID_AA64DFR0_PMSVER_SHIFT)) 82 ID_AA64DFR0_PMSVER_SHIFT))
diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c
index 9345b44b86f0..f57118e1f6b4 100644
--- a/arch/parisc/boot/compressed/misc.c
+++ b/arch/parisc/boot/compressed/misc.c
@@ -123,8 +123,8 @@ int puts(const char *s)
123 while ((nuline = strchr(s, '\n')) != NULL) { 123 while ((nuline = strchr(s, '\n')) != NULL) {
124 if (nuline != s) 124 if (nuline != s)
125 pdc_iodc_print(s, nuline - s); 125 pdc_iodc_print(s, nuline - s);
126 pdc_iodc_print("\r\n", 2); 126 pdc_iodc_print("\r\n", 2);
127 s = nuline + 1; 127 s = nuline + 1;
128 } 128 }
129 if (*s != '\0') 129 if (*s != '\0')
130 pdc_iodc_print(s, strlen(s)); 130 pdc_iodc_print(s, strlen(s));
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index c980a02a52bc..598c8d60fa5e 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -35,7 +35,12 @@ struct thread_info {
35 35
36/* thread information allocation */ 36/* thread information allocation */
37 37
38#ifdef CONFIG_IRQSTACKS
39#define THREAD_SIZE_ORDER 2 /* PA-RISC requires at least 16k stack */
40#else
38#define THREAD_SIZE_ORDER 3 /* PA-RISC requires at least 32k stack */ 41#define THREAD_SIZE_ORDER 3 /* PA-RISC requires at least 32k stack */
42#endif
43
39/* Be sure to hunt all references to this down when you change the size of 44/* Be sure to hunt all references to this down when you change the size of
40 * the kernel stack */ 45 * the kernel stack */
41#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) 46#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index a4fd296c958e..f3cecf5117cf 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -878,9 +878,6 @@ ENTRY_CFI(syscall_exit_rfi)
878 STREG %r19,PT_SR7(%r16) 878 STREG %r19,PT_SR7(%r16)
879 879
880intr_return: 880intr_return:
881 /* NOTE: Need to enable interrupts incase we schedule. */
882 ssm PSW_SM_I, %r0
883
884 /* check for reschedule */ 881 /* check for reschedule */
885 mfctl %cr30,%r1 882 mfctl %cr30,%r1
886 LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */ 883 LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */
@@ -907,6 +904,11 @@ intr_check_sig:
907 LDREG PT_IASQ1(%r16), %r20 904 LDREG PT_IASQ1(%r16), %r20
908 cmpib,COND(=),n 0,%r20,intr_restore /* backward */ 905 cmpib,COND(=),n 0,%r20,intr_restore /* backward */
909 906
907 /* NOTE: We need to enable interrupts if we have to deliver
908 * signals. We used to do this earlier but it caused kernel
909 * stack overflows. */
910 ssm PSW_SM_I, %r0
911
910 copy %r0, %r25 /* long in_syscall = 0 */ 912 copy %r0, %r25 /* long in_syscall = 0 */
911#ifdef CONFIG_64BIT 913#ifdef CONFIG_64BIT
912 ldo -16(%r30),%r29 /* Reference param save area */ 914 ldo -16(%r30),%r29 /* Reference param save area */
@@ -958,6 +960,10 @@ intr_do_resched:
958 cmpib,COND(=) 0, %r20, intr_do_preempt 960 cmpib,COND(=) 0, %r20, intr_do_preempt
959 nop 961 nop
960 962
963 /* NOTE: We need to enable interrupts if we schedule. We used
964 * to do this earlier but it caused kernel stack overflows. */
965 ssm PSW_SM_I, %r0
966
961#ifdef CONFIG_64BIT 967#ifdef CONFIG_64BIT
962 ldo -16(%r30),%r29 /* Reference param save area */ 968 ldo -16(%r30),%r29 /* Reference param save area */
963#endif 969#endif
diff --git a/arch/parisc/kernel/hpmc.S b/arch/parisc/kernel/hpmc.S
index e3a8e5e4d5de..8d072c44f300 100644
--- a/arch/parisc/kernel/hpmc.S
+++ b/arch/parisc/kernel/hpmc.S
@@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc)
305 305
306 306
307 __INITRODATA 307 __INITRODATA
308 .align 4
308 .export os_hpmc_size 309 .export os_hpmc_size
309os_hpmc_size: 310os_hpmc_size:
310 .word .os_hpmc_end-.os_hpmc 311 .word .os_hpmc_end-.os_hpmc
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 5a657986ebbf..143f90e2f9f3 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -15,7 +15,6 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/kallsyms.h> 16#include <linux/kallsyms.h>
17#include <linux/sort.h> 17#include <linux/sort.h>
18#include <linux/sched.h>
19 18
20#include <linux/uaccess.h> 19#include <linux/uaccess.h>
21#include <asm/assembly.h> 20#include <asm/assembly.h>
diff --git a/arch/parisc/lib/delay.c b/arch/parisc/lib/delay.c
index 7eab4bb8abe6..66e506520505 100644
--- a/arch/parisc/lib/delay.c
+++ b/arch/parisc/lib/delay.c
@@ -16,9 +16,7 @@
16#include <linux/preempt.h> 16#include <linux/preempt.h>
17#include <linux/init.h> 17#include <linux/init.h>
18 18
19#include <asm/processor.h>
20#include <asm/delay.h> 19#include <asm/delay.h>
21
22#include <asm/special_insns.h> /* for mfctl() */ 20#include <asm/special_insns.h> /* for mfctl() */
23#include <asm/processor.h> /* for boot_cpu_data */ 21#include <asm/processor.h> /* for boot_cpu_data */
24 22
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index d5a5bc43cf8f..6771c63b2bec 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -763,7 +763,8 @@ emit_clear:
763 func = (u8 *) __bpf_call_base + imm; 763 func = (u8 *) __bpf_call_base + imm;
764 764
765 /* Save skb pointer if we need to re-cache skb data */ 765 /* Save skb pointer if we need to re-cache skb data */
766 if (bpf_helper_changes_pkt_data(func)) 766 if ((ctx->seen & SEEN_SKB) &&
767 bpf_helper_changes_pkt_data(func))
767 PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); 768 PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
768 769
769 bpf_jit_emit_func_call(image, ctx, (u64)func); 770 bpf_jit_emit_func_call(image, ctx, (u64)func);
@@ -772,7 +773,8 @@ emit_clear:
772 PPC_MR(b2p[BPF_REG_0], 3); 773 PPC_MR(b2p[BPF_REG_0], 3);
773 774
774 /* refresh skb cache */ 775 /* refresh skb cache */
775 if (bpf_helper_changes_pkt_data(func)) { 776 if ((ctx->seen & SEEN_SKB) &&
777 bpf_helper_changes_pkt_data(func)) {
776 /* reload skb pointer to r3 */ 778 /* reload skb pointer to r3 */
777 PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); 779 PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
778 bpf_jit_emit_skb_loads(image, ctx); 780 bpf_jit_emit_skb_loads(image, ctx);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index f4baa8c514d3..1dfadbd126f3 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -55,8 +55,7 @@ struct bpf_jit {
55#define SEEN_LITERAL 8 /* code uses literals */ 55#define SEEN_LITERAL 8 /* code uses literals */
56#define SEEN_FUNC 16 /* calls C functions */ 56#define SEEN_FUNC 16 /* calls C functions */
57#define SEEN_TAIL_CALL 32 /* code uses tail calls */ 57#define SEEN_TAIL_CALL 32 /* code uses tail calls */
58#define SEEN_SKB_CHANGE 64 /* code changes skb data */ 58#define SEEN_REG_AX 64 /* code uses constant blinding */
59#define SEEN_REG_AX 128 /* code uses constant blinding */
60#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB) 59#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
61 60
62/* 61/*
@@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
448 EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, 447 EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
449 REG_15, 152); 448 REG_15, 152);
450 } 449 }
451 if (jit->seen & SEEN_SKB) 450 if (jit->seen & SEEN_SKB) {
452 emit_load_skb_data_hlen(jit); 451 emit_load_skb_data_hlen(jit);
453 if (jit->seen & SEEN_SKB_CHANGE)
454 /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ 452 /* stg %b1,ST_OFF_SKBP(%r0,%r15) */
455 EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15, 453 EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
456 STK_OFF_SKBP); 454 STK_OFF_SKBP);
455 }
457} 456}
458 457
459/* 458/*
@@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
983 EMIT2(0x0d00, REG_14, REG_W1); 982 EMIT2(0x0d00, REG_14, REG_W1);
984 /* lgr %b0,%r2: load return value into %b0 */ 983 /* lgr %b0,%r2: load return value into %b0 */
985 EMIT4(0xb9040000, BPF_REG_0, REG_2); 984 EMIT4(0xb9040000, BPF_REG_0, REG_2);
986 if (bpf_helper_changes_pkt_data((void *)func)) { 985 if ((jit->seen & SEEN_SKB) &&
987 jit->seen |= SEEN_SKB_CHANGE; 986 bpf_helper_changes_pkt_data((void *)func)) {
988 /* lg %b1,ST_OFF_SKBP(%r15) */ 987 /* lg %b1,ST_OFF_SKBP(%r15) */
989 EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0, 988 EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
990 REG_15, STK_OFF_SKBP); 989 REG_15, STK_OFF_SKBP);
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index be3136f142a9..a8103a84b4ac 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -113,7 +113,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
113 if (!printk_ratelimit()) 113 if (!printk_ratelimit())
114 return; 114 return;
115 115
116 printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x", 116 printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
117 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 117 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
118 tsk->comm, task_pid_nr(tsk), address, 118 tsk->comm, task_pid_nr(tsk), address,
119 (void *)regs->pc, (void *)regs->u_regs[UREG_I7], 119 (void *)regs->pc, (void *)regs->u_regs[UREG_I7],
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 815c03d7a765..41363f46797b 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -154,7 +154,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
154 if (!printk_ratelimit()) 154 if (!printk_ratelimit())
155 return; 155 return;
156 156
157 printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x", 157 printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
158 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 158 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
159 tsk->comm, task_pid_nr(tsk), address, 159 tsk->comm, task_pid_nr(tsk), address,
160 (void *)regs->tpc, (void *)regs->u_regs[UREG_I7], 160 (void *)regs->tpc, (void *)regs->u_regs[UREG_I7],
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index a2f1b5e774a7..22aff21fa44d 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
1245 u8 *func = ((u8 *)__bpf_call_base) + imm; 1245 u8 *func = ((u8 *)__bpf_call_base) + imm;
1246 1246
1247 ctx->saw_call = true; 1247 ctx->saw_call = true;
1248 if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
1249 emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
1248 1250
1249 emit_call((u32 *)func, ctx); 1251 emit_call((u32 *)func, ctx);
1250 emit_nop(ctx); 1252 emit_nop(ctx);
1251 1253
1252 emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); 1254 emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
1253 1255
1254 if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind) 1256 if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
1255 load_skb_regs(ctx, bpf2sparc[BPF_REG_6]); 1257 load_skb_regs(ctx, L7);
1256 break; 1258 break;
1257 } 1259 }
1258 1260
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 4e6fcb32620f..428644175956 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -150,7 +150,7 @@ static void show_segv_info(struct uml_pt_regs *regs)
150 if (!printk_ratelimit()) 150 if (!printk_ratelimit())
151 return; 151 return;
152 152
153 printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x", 153 printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x",
154 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 154 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
155 tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi), 155 tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
156 (void *)UPT_IP(regs), (void *)UPT_SP(regs), 156 (void *)UPT_IP(regs), (void *)UPT_SP(regs),
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 4838037f97f6..bd8b57a5c874 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -941,7 +941,8 @@ ENTRY(debug)
941 movl %esp, %eax # pt_regs pointer 941 movl %esp, %eax # pt_regs pointer
942 942
943 /* Are we currently on the SYSENTER stack? */ 943 /* Are we currently on the SYSENTER stack? */
944 PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) 944 movl PER_CPU_VAR(cpu_entry_area), %ecx
945 addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
945 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ 946 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
946 cmpl $SIZEOF_SYSENTER_stack, %ecx 947 cmpl $SIZEOF_SYSENTER_stack, %ecx
947 jb .Ldebug_from_sysenter_stack 948 jb .Ldebug_from_sysenter_stack
@@ -984,7 +985,8 @@ ENTRY(nmi)
984 movl %esp, %eax # pt_regs pointer 985 movl %esp, %eax # pt_regs pointer
985 986
986 /* Are we currently on the SYSENTER stack? */ 987 /* Are we currently on the SYSENTER stack? */
987 PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) 988 movl PER_CPU_VAR(cpu_entry_area), %ecx
989 addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
988 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ 990 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
989 cmpl $SIZEOF_SYSENTER_stack, %ecx 991 cmpl $SIZEOF_SYSENTER_stack, %ecx
990 jb .Lnmi_from_sysenter_stack 992 jb .Lnmi_from_sysenter_stack
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f81d50d7ceac..423885bee398 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -140,6 +140,64 @@ END(native_usergs_sysret64)
140 * with them due to bugs in both AMD and Intel CPUs. 140 * with them due to bugs in both AMD and Intel CPUs.
141 */ 141 */
142 142
143 .pushsection .entry_trampoline, "ax"
144
145/*
146 * The code in here gets remapped into cpu_entry_area's trampoline. This means
147 * that the assembler and linker have the wrong idea as to where this code
148 * lives (and, in fact, it's mapped more than once, so it's not even at a
149 * fixed address). So we can't reference any symbols outside the entry
150 * trampoline and expect it to work.
151 *
152 * Instead, we carefully abuse %rip-relative addressing.
153 * _entry_trampoline(%rip) refers to the start of the remapped) entry
154 * trampoline. We can thus find cpu_entry_area with this macro:
155 */
156
157#define CPU_ENTRY_AREA \
158 _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
159
160/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
161#define RSP_SCRATCH CPU_ENTRY_AREA_SYSENTER_stack + \
162 SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
163
164ENTRY(entry_SYSCALL_64_trampoline)
165 UNWIND_HINT_EMPTY
166 swapgs
167
168 /* Stash the user RSP. */
169 movq %rsp, RSP_SCRATCH
170
171 /* Load the top of the task stack into RSP */
172 movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
173
174 /* Start building the simulated IRET frame. */
175 pushq $__USER_DS /* pt_regs->ss */
176 pushq RSP_SCRATCH /* pt_regs->sp */
177 pushq %r11 /* pt_regs->flags */
178 pushq $__USER_CS /* pt_regs->cs */
179 pushq %rcx /* pt_regs->ip */
180
181 /*
182 * x86 lacks a near absolute jump, and we can't jump to the real
183 * entry text with a relative jump. We could push the target
184 * address and then use retq, but this destroys the pipeline on
185 * many CPUs (wasting over 20 cycles on Sandy Bridge). Instead,
186 * spill RDI and restore it in a second-stage trampoline.
187 */
188 pushq %rdi
189 movq $entry_SYSCALL_64_stage2, %rdi
190 jmp *%rdi
191END(entry_SYSCALL_64_trampoline)
192
193 .popsection
194
195ENTRY(entry_SYSCALL_64_stage2)
196 UNWIND_HINT_EMPTY
197 popq %rdi
198 jmp entry_SYSCALL_64_after_hwframe
199END(entry_SYSCALL_64_stage2)
200
143ENTRY(entry_SYSCALL_64) 201ENTRY(entry_SYSCALL_64)
144 UNWIND_HINT_EMPTY 202 UNWIND_HINT_EMPTY
145 /* 203 /*
@@ -330,8 +388,24 @@ syscall_return_via_sysret:
330 popq %rsi /* skip rcx */ 388 popq %rsi /* skip rcx */
331 popq %rdx 389 popq %rdx
332 popq %rsi 390 popq %rsi
391
392 /*
393 * Now all regs are restored except RSP and RDI.
394 * Save old stack pointer and switch to trampoline stack.
395 */
396 movq %rsp, %rdi
397 movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
398
399 pushq RSP-RDI(%rdi) /* RSP */
400 pushq (%rdi) /* RDI */
401
402 /*
403 * We are on the trampoline stack. All regs except RDI are live.
404 * We can do future final exit work right here.
405 */
406
333 popq %rdi 407 popq %rdi
334 movq RSP-ORIG_RAX(%rsp), %rsp 408 popq %rsp
335 USERGS_SYSRET64 409 USERGS_SYSRET64
336END(entry_SYSCALL_64) 410END(entry_SYSCALL_64)
337 411
@@ -466,12 +540,13 @@ END(irq_entries_start)
466 540
467.macro DEBUG_ENTRY_ASSERT_IRQS_OFF 541.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
468#ifdef CONFIG_DEBUG_ENTRY 542#ifdef CONFIG_DEBUG_ENTRY
469 pushfq 543 pushq %rax
470 testl $X86_EFLAGS_IF, (%rsp) 544 SAVE_FLAGS(CLBR_RAX)
545 testl $X86_EFLAGS_IF, %eax
471 jz .Lokay_\@ 546 jz .Lokay_\@
472 ud2 547 ud2
473.Lokay_\@: 548.Lokay_\@:
474 addq $8, %rsp 549 popq %rax
475#endif 550#endif
476.endm 551.endm
477 552
@@ -563,6 +638,13 @@ END(irq_entries_start)
563/* 0(%rsp): ~(interrupt number) */ 638/* 0(%rsp): ~(interrupt number) */
564 .macro interrupt func 639 .macro interrupt func
565 cld 640 cld
641
642 testb $3, CS-ORIG_RAX(%rsp)
643 jz 1f
644 SWAPGS
645 call switch_to_thread_stack
6461:
647
566 ALLOC_PT_GPREGS_ON_STACK 648 ALLOC_PT_GPREGS_ON_STACK
567 SAVE_C_REGS 649 SAVE_C_REGS
568 SAVE_EXTRA_REGS 650 SAVE_EXTRA_REGS
@@ -572,12 +654,8 @@ END(irq_entries_start)
572 jz 1f 654 jz 1f
573 655
574 /* 656 /*
575 * IRQ from user mode. Switch to kernel gsbase and inform context 657 * IRQ from user mode.
576 * tracking that we're in kernel mode. 658 *
577 */
578 SWAPGS
579
580 /*
581 * We need to tell lockdep that IRQs are off. We can't do this until 659 * We need to tell lockdep that IRQs are off. We can't do this until
582 * we fix gsbase, and we should do it before enter_from_user_mode 660 * we fix gsbase, and we should do it before enter_from_user_mode
583 * (which can take locks). Since TRACE_IRQS_OFF idempotent, 661 * (which can take locks). Since TRACE_IRQS_OFF idempotent,
@@ -630,10 +708,41 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
630 ud2 708 ud2
6311: 7091:
632#endif 710#endif
633 SWAPGS
634 POP_EXTRA_REGS 711 POP_EXTRA_REGS
635 POP_C_REGS 712 popq %r11
636 addq $8, %rsp /* skip regs->orig_ax */ 713 popq %r10
714 popq %r9
715 popq %r8
716 popq %rax
717 popq %rcx
718 popq %rdx
719 popq %rsi
720
721 /*
722 * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
723 * Save old stack pointer and switch to trampoline stack.
724 */
725 movq %rsp, %rdi
726 movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
727
728 /* Copy the IRET frame to the trampoline stack. */
729 pushq 6*8(%rdi) /* SS */
730 pushq 5*8(%rdi) /* RSP */
731 pushq 4*8(%rdi) /* EFLAGS */
732 pushq 3*8(%rdi) /* CS */
733 pushq 2*8(%rdi) /* RIP */
734
735 /* Push user RDI on the trampoline stack. */
736 pushq (%rdi)
737
738 /*
739 * We are on the trampoline stack. All regs except RDI are live.
740 * We can do future final exit work right here.
741 */
742
743 /* Restore RDI. */
744 popq %rdi
745 SWAPGS
637 INTERRUPT_RETURN 746 INTERRUPT_RETURN
638 747
639 748
@@ -829,7 +938,33 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
829/* 938/*
830 * Exception entry points. 939 * Exception entry points.
831 */ 940 */
832#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8) 941#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
942
943/*
944 * Switch to the thread stack. This is called with the IRET frame and
945 * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
946 * space has not been allocated for them.)
947 */
948ENTRY(switch_to_thread_stack)
949 UNWIND_HINT_FUNC
950
951 pushq %rdi
952 movq %rsp, %rdi
953 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
954 UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
955
956 pushq 7*8(%rdi) /* regs->ss */
957 pushq 6*8(%rdi) /* regs->rsp */
958 pushq 5*8(%rdi) /* regs->eflags */
959 pushq 4*8(%rdi) /* regs->cs */
960 pushq 3*8(%rdi) /* regs->ip */
961 pushq 2*8(%rdi) /* regs->orig_ax */
962 pushq 8(%rdi) /* return address */
963 UNWIND_HINT_FUNC
964
965 movq (%rdi), %rdi
966 ret
967END(switch_to_thread_stack)
833 968
834.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 969.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
835ENTRY(\sym) 970ENTRY(\sym)
@@ -848,11 +983,12 @@ ENTRY(\sym)
848 983
849 ALLOC_PT_GPREGS_ON_STACK 984 ALLOC_PT_GPREGS_ON_STACK
850 985
851 .if \paranoid 986 .if \paranoid < 2
852 .if \paranoid == 1
853 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */ 987 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
854 jnz 1f 988 jnz .Lfrom_usermode_switch_stack_\@
855 .endif 989 .endif
990
991 .if \paranoid
856 call paranoid_entry 992 call paranoid_entry
857 .else 993 .else
858 call error_entry 994 call error_entry
@@ -894,20 +1030,15 @@ ENTRY(\sym)
894 jmp error_exit 1030 jmp error_exit
895 .endif 1031 .endif
896 1032
897 .if \paranoid == 1 1033 .if \paranoid < 2
898 /* 1034 /*
899 * Paranoid entry from userspace. Switch stacks and treat it 1035 * Entry from userspace. Switch stacks and treat it
900 * as a normal entry. This means that paranoid handlers 1036 * as a normal entry. This means that paranoid handlers
901 * run in real process context if user_mode(regs). 1037 * run in real process context if user_mode(regs).
902 */ 1038 */
9031: 1039.Lfrom_usermode_switch_stack_\@:
904 call error_entry 1040 call error_entry
905 1041
906
907 movq %rsp, %rdi /* pt_regs pointer */
908 call sync_regs
909 movq %rax, %rsp /* switch stack */
910
911 movq %rsp, %rdi /* pt_regs pointer */ 1042 movq %rsp, %rdi /* pt_regs pointer */
912 1043
913 .if \has_error_code 1044 .if \has_error_code
@@ -1170,6 +1301,14 @@ ENTRY(error_entry)
1170 SWAPGS 1301 SWAPGS
1171 1302
1172.Lerror_entry_from_usermode_after_swapgs: 1303.Lerror_entry_from_usermode_after_swapgs:
1304 /* Put us onto the real thread stack. */
1305 popq %r12 /* save return addr in %12 */
1306 movq %rsp, %rdi /* arg0 = pt_regs pointer */
1307 call sync_regs
1308 movq %rax, %rsp /* switch stack */
1309 ENCODE_FRAME_POINTER
1310 pushq %r12
1311
1173 /* 1312 /*
1174 * We need to tell lockdep that IRQs are off. We can't do this until 1313 * We need to tell lockdep that IRQs are off. We can't do this until
1175 * we fix gsbase, and we should do it before enter_from_user_mode 1314 * we fix gsbase, and we should do it before enter_from_user_mode
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 568e130d932c..95ad40eb7eff 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -48,7 +48,7 @@
48 */ 48 */
49ENTRY(entry_SYSENTER_compat) 49ENTRY(entry_SYSENTER_compat)
50 /* Interrupts are off on entry. */ 50 /* Interrupts are off on entry. */
51 SWAPGS_UNSAFE_STACK 51 SWAPGS
52 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 52 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
53 53
54 /* 54 /*
@@ -306,8 +306,11 @@ ENTRY(entry_INT80_compat)
306 */ 306 */
307 movl %eax, %eax 307 movl %eax, %eax
308 308
309 /* Construct struct pt_regs on stack (iret frame is already on stack) */
310 pushq %rax /* pt_regs->orig_ax */ 309 pushq %rax /* pt_regs->orig_ax */
310
311 /* switch to thread stack expects orig_ax to be pushed */
312 call switch_to_thread_stack
313
311 pushq %rdi /* pt_regs->di */ 314 pushq %rdi /* pt_regs->di */
312 pushq %rsi /* pt_regs->si */ 315 pushq %rsi /* pt_regs->si */
313 pushq %rdx /* pt_regs->dx */ 316 pushq %rdx /* pt_regs->dx */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index bf6a76202a77..ea9a7dde62e5 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
135 set_bit(bit, (unsigned long *)cpu_caps_set); \ 135 set_bit(bit, (unsigned long *)cpu_caps_set); \
136} while (0) 136} while (0)
137 137
138#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
139
138#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) 140#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
139/* 141/*
140 * Static testing of CPU features. Used the same as boot_cpu_has(). 142 * Static testing of CPU features. Used the same as boot_cpu_has().
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 4011cb03ef08..aab4fe9f49f8 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -60,17 +60,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
60 return this_cpu_ptr(&gdt_page)->gdt; 60 return this_cpu_ptr(&gdt_page)->gdt;
61} 61}
62 62
63/* Get the fixmap index for a specific processor */
64static inline unsigned int get_cpu_gdt_ro_index(int cpu)
65{
66 return FIX_GDT_REMAP_BEGIN + cpu;
67}
68
69/* Provide the fixmap address of the remapped GDT */ 63/* Provide the fixmap address of the remapped GDT */
70static inline struct desc_struct *get_cpu_gdt_ro(int cpu) 64static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
71{ 65{
72 unsigned int idx = get_cpu_gdt_ro_index(cpu); 66 return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
73 return (struct desc_struct *)__fix_to_virt(idx);
74} 67}
75 68
76/* Provide the current read-only GDT */ 69/* Provide the current read-only GDT */
@@ -185,7 +178,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
185#endif 178#endif
186} 179}
187 180
188static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr) 181static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
189{ 182{
190 struct desc_struct *d = get_cpu_gdt_rw(cpu); 183 struct desc_struct *d = get_cpu_gdt_rw(cpu);
191 tss_desc tss; 184 tss_desc tss;
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index b0c505fe9a95..94fc4fa14127 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -44,6 +44,45 @@ extern unsigned long __FIXADDR_TOP;
44 PAGE_SIZE) 44 PAGE_SIZE)
45#endif 45#endif
46 46
47/*
48 * cpu_entry_area is a percpu region in the fixmap that contains things
49 * needed by the CPU and early entry/exit code. Real types aren't used
50 * for all fields here to avoid circular header dependencies.
51 *
52 * Every field is a virtual alias of some other allocated backing store.
53 * There is no direct allocation of a struct cpu_entry_area.
54 */
55struct cpu_entry_area {
56 char gdt[PAGE_SIZE];
57
58 /*
59 * The GDT is just below SYSENTER_stack and thus serves (on x86_64) as
60 * a a read-only guard page.
61 */
62 struct SYSENTER_stack_page SYSENTER_stack_page;
63
64 /*
65 * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
66 * we need task switches to work, and task switches write to the TSS.
67 */
68 struct tss_struct tss;
69
70 char entry_trampoline[PAGE_SIZE];
71
72#ifdef CONFIG_X86_64
73 /*
74 * Exception stacks used for IST entries.
75 *
76 * In the future, this should have a separate slot for each stack
77 * with guard pages between them.
78 */
79 char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
80#endif
81};
82
83#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
84
85extern void setup_cpu_entry_areas(void);
47 86
48/* 87/*
49 * Here we define all the compile-time 'special' virtual 88 * Here we define all the compile-time 'special' virtual
@@ -101,8 +140,8 @@ enum fixed_addresses {
101 FIX_LNW_VRTC, 140 FIX_LNW_VRTC,
102#endif 141#endif
103 /* Fixmap entries to remap the GDTs, one per processor. */ 142 /* Fixmap entries to remap the GDTs, one per processor. */
104 FIX_GDT_REMAP_BEGIN, 143 FIX_CPU_ENTRY_AREA_TOP,
105 FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1, 144 FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1,
106 145
107#ifdef CONFIG_ACPI_APEI_GHES 146#ifdef CONFIG_ACPI_APEI_GHES
108 /* Used for GHES mapping from assorted contexts */ 147 /* Used for GHES mapping from assorted contexts */
@@ -191,5 +230,30 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
191void __early_set_fixmap(enum fixed_addresses idx, 230void __early_set_fixmap(enum fixed_addresses idx,
192 phys_addr_t phys, pgprot_t flags); 231 phys_addr_t phys, pgprot_t flags);
193 232
233static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page)
234{
235 BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
236
237 return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page;
238}
239
240#define __get_cpu_entry_area_offset_index(cpu, offset) ({ \
241 BUILD_BUG_ON(offset % PAGE_SIZE != 0); \
242 __get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE); \
243 })
244
245#define get_cpu_entry_area_index(cpu, field) \
246 __get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field))
247
248static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
249{
250 return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
251}
252
253static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu)
254{
255 return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack;
256}
257
194#endif /* !__ASSEMBLY__ */ 258#endif /* !__ASSEMBLY__ */
195#endif /* _ASM_X86_FIXMAP_H */ 259#endif /* _ASM_X86_FIXMAP_H */
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 1b0a5abcd8ae..96aa6b9884dc 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -20,16 +20,7 @@
20#ifndef _ASM_X86_HYPERVISOR_H 20#ifndef _ASM_X86_HYPERVISOR_H
21#define _ASM_X86_HYPERVISOR_H 21#define _ASM_X86_HYPERVISOR_H
22 22
23#ifdef CONFIG_HYPERVISOR_GUEST 23/* x86 hypervisor types */
24
25#include <asm/kvm_para.h>
26#include <asm/x86_init.h>
27#include <asm/xen/hypervisor.h>
28
29/*
30 * x86 hypervisor information
31 */
32
33enum x86_hypervisor_type { 24enum x86_hypervisor_type {
34 X86_HYPER_NATIVE = 0, 25 X86_HYPER_NATIVE = 0,
35 X86_HYPER_VMWARE, 26 X86_HYPER_VMWARE,
@@ -39,6 +30,12 @@ enum x86_hypervisor_type {
39 X86_HYPER_KVM, 30 X86_HYPER_KVM,
40}; 31};
41 32
33#ifdef CONFIG_HYPERVISOR_GUEST
34
35#include <asm/kvm_para.h>
36#include <asm/x86_init.h>
37#include <asm/xen/hypervisor.h>
38
42struct hypervisor_x86 { 39struct hypervisor_x86 {
43 /* Hypervisor name */ 40 /* Hypervisor name */
44 const char *name; 41 const char *name;
@@ -58,7 +55,15 @@ struct hypervisor_x86 {
58 55
59extern enum x86_hypervisor_type x86_hyper_type; 56extern enum x86_hypervisor_type x86_hyper_type;
60extern void init_hypervisor_platform(void); 57extern void init_hypervisor_platform(void);
58static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
59{
60 return x86_hyper_type == type;
61}
61#else 62#else
62static inline void init_hypervisor_platform(void) { } 63static inline void init_hypervisor_platform(void) { }
64static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
65{
66 return type == X86_HYPER_NATIVE;
67}
63#endif /* CONFIG_HYPERVISOR_GUEST */ 68#endif /* CONFIG_HYPERVISOR_GUEST */
64#endif /* _ASM_X86_HYPERVISOR_H */ 69#endif /* _ASM_X86_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index c8ef23f2c28f..89f08955fff7 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void)
142 swapgs; \ 142 swapgs; \
143 sysretl 143 sysretl
144 144
145#ifdef CONFIG_DEBUG_ENTRY
146#define SAVE_FLAGS(x) pushfq; popq %rax
147#endif
145#else 148#else
146#define INTERRUPT_RETURN iret 149#define INTERRUPT_RETURN iret
147#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit 150#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index f86a8caa561e..395c9631e000 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
26extern int __must_check __die(const char *, struct pt_regs *, long); 26extern int __must_check __die(const char *, struct pt_regs *, long);
27extern void show_stack_regs(struct pt_regs *regs); 27extern void show_stack_regs(struct pt_regs *regs);
28extern void __show_regs(struct pt_regs *regs, int all); 28extern void __show_regs(struct pt_regs *regs, int all);
29extern void show_iret_regs(struct pt_regs *regs);
29extern unsigned long oops_begin(void); 30extern unsigned long oops_begin(void);
30extern void oops_end(unsigned long, struct pt_regs *, int signr); 31extern void oops_end(unsigned long, struct pt_regs *, int signr);
31 32
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 283efcaac8af..892df375b615 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -927,6 +927,15 @@ extern void default_banner(void);
927 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ 927 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
928 CLBR_NONE, \ 928 CLBR_NONE, \
929 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) 929 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
930
931#ifdef CONFIG_DEBUG_ENTRY
932#define SAVE_FLAGS(clobbers) \
933 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
934 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
935 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \
936 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
937#endif
938
930#endif /* CONFIG_X86_32 */ 939#endif /* CONFIG_X86_32 */
931 940
932#endif /* __ASSEMBLY__ */ 941#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index cc16fa882e3e..1f2434ee9f80 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -163,9 +163,9 @@ enum cpuid_regs_idx {
163extern struct cpuinfo_x86 boot_cpu_data; 163extern struct cpuinfo_x86 boot_cpu_data;
164extern struct cpuinfo_x86 new_cpu_data; 164extern struct cpuinfo_x86 new_cpu_data;
165 165
166extern struct tss_struct doublefault_tss; 166extern struct x86_hw_tss doublefault_tss;
167extern __u32 cpu_caps_cleared[NCAPINTS]; 167extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
168extern __u32 cpu_caps_set[NCAPINTS]; 168extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
169 169
170#ifdef CONFIG_SMP 170#ifdef CONFIG_SMP
171DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); 171DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
@@ -253,6 +253,11 @@ static inline void load_cr3(pgd_t *pgdir)
253 write_cr3(__sme_pa(pgdir)); 253 write_cr3(__sme_pa(pgdir));
254} 254}
255 255
256/*
257 * Note that while the legacy 'TSS' name comes from 'Task State Segment',
258 * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
259 * unrelated to the task-switch mechanism:
260 */
256#ifdef CONFIG_X86_32 261#ifdef CONFIG_X86_32
257/* This is the TSS defined by the hardware. */ 262/* This is the TSS defined by the hardware. */
258struct x86_hw_tss { 263struct x86_hw_tss {
@@ -305,7 +310,13 @@ struct x86_hw_tss {
305struct x86_hw_tss { 310struct x86_hw_tss {
306 u32 reserved1; 311 u32 reserved1;
307 u64 sp0; 312 u64 sp0;
313
314 /*
315 * We store cpu_current_top_of_stack in sp1 so it's always accessible.
316 * Linux does not use ring 1, so sp1 is not otherwise needed.
317 */
308 u64 sp1; 318 u64 sp1;
319
309 u64 sp2; 320 u64 sp2;
310 u64 reserved2; 321 u64 reserved2;
311 u64 ist[7]; 322 u64 ist[7];
@@ -323,12 +334,22 @@ struct x86_hw_tss {
323#define IO_BITMAP_BITS 65536 334#define IO_BITMAP_BITS 65536
324#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) 335#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
325#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) 336#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
326#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap) 337#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
327#define INVALID_IO_BITMAP_OFFSET 0x8000 338#define INVALID_IO_BITMAP_OFFSET 0x8000
328 339
340struct SYSENTER_stack {
341 unsigned long words[64];
342};
343
344struct SYSENTER_stack_page {
345 struct SYSENTER_stack stack;
346} __aligned(PAGE_SIZE);
347
329struct tss_struct { 348struct tss_struct {
330 /* 349 /*
331 * The hardware state: 350 * The fixed hardware portion. This must not cross a page boundary
351 * at risk of violating the SDM's advice and potentially triggering
352 * errata.
332 */ 353 */
333 struct x86_hw_tss x86_tss; 354 struct x86_hw_tss x86_tss;
334 355
@@ -339,18 +360,9 @@ struct tss_struct {
339 * be within the limit. 360 * be within the limit.
340 */ 361 */
341 unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; 362 unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
363} __aligned(PAGE_SIZE);
342 364
343#ifdef CONFIG_X86_32 365DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
344 /*
345 * Space for the temporary SYSENTER stack.
346 */
347 unsigned long SYSENTER_stack_canary;
348 unsigned long SYSENTER_stack[64];
349#endif
350
351} ____cacheline_aligned;
352
353DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
354 366
355/* 367/*
356 * sizeof(unsigned long) coming from an extra "long" at the end 368 * sizeof(unsigned long) coming from an extra "long" at the end
@@ -364,6 +376,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
364 376
365#ifdef CONFIG_X86_32 377#ifdef CONFIG_X86_32
366DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); 378DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
379#else
380/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
381#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
367#endif 382#endif
368 383
369/* 384/*
@@ -523,7 +538,7 @@ static inline void native_set_iopl_mask(unsigned mask)
523static inline void 538static inline void
524native_load_sp0(unsigned long sp0) 539native_load_sp0(unsigned long sp0)
525{ 540{
526 this_cpu_write(cpu_tss.x86_tss.sp0, sp0); 541 this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
527} 542}
528 543
529static inline void native_swapgs(void) 544static inline void native_swapgs(void)
@@ -535,12 +550,12 @@ static inline void native_swapgs(void)
535 550
536static inline unsigned long current_top_of_stack(void) 551static inline unsigned long current_top_of_stack(void)
537{ 552{
538#ifdef CONFIG_X86_64 553 /*
539 return this_cpu_read_stable(cpu_tss.x86_tss.sp0); 554 * We can't read directly from tss.sp0: sp0 on x86_32 is special in
540#else 555 * and around vm86 mode and sp0 on x86_64 is special because of the
541 /* sp0 on x86_32 is special in and around vm86 mode. */ 556 * entry trampoline.
557 */
542 return this_cpu_read_stable(cpu_current_top_of_stack); 558 return this_cpu_read_stable(cpu_current_top_of_stack);
543#endif
544} 559}
545 560
546static inline bool on_thread_stack(void) 561static inline bool on_thread_stack(void)
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 8da111b3c342..f8062bfd43a0 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -16,6 +16,7 @@ enum stack_type {
16 STACK_TYPE_TASK, 16 STACK_TYPE_TASK,
17 STACK_TYPE_IRQ, 17 STACK_TYPE_IRQ,
18 STACK_TYPE_SOFTIRQ, 18 STACK_TYPE_SOFTIRQ,
19 STACK_TYPE_SYSENTER,
19 STACK_TYPE_EXCEPTION, 20 STACK_TYPE_EXCEPTION,
20 STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, 21 STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
21}; 22};
@@ -28,6 +29,8 @@ struct stack_info {
28bool in_task_stack(unsigned long *stack, struct task_struct *task, 29bool in_task_stack(unsigned long *stack, struct task_struct *task,
29 struct stack_info *info); 30 struct stack_info *info);
30 31
32bool in_sysenter_stack(unsigned long *stack, struct stack_info *info);
33
31int get_stack_info(unsigned long *stack, struct task_struct *task, 34int get_stack_info(unsigned long *stack, struct task_struct *task,
32 struct stack_info *info, unsigned long *visit_mask); 35 struct stack_info *info, unsigned long *visit_mask);
33 36
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 8c6bd6863db9..9b6df68d8fd1 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -79,10 +79,10 @@ do { \
79static inline void refresh_sysenter_cs(struct thread_struct *thread) 79static inline void refresh_sysenter_cs(struct thread_struct *thread)
80{ 80{
81 /* Only happens when SEP is enabled, no need to test "SEP"arately: */ 81 /* Only happens when SEP is enabled, no need to test "SEP"arately: */
82 if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs)) 82 if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs))
83 return; 83 return;
84 84
85 this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs); 85 this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
86 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); 86 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
87} 87}
88#endif 88#endif
@@ -90,10 +90,12 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
90/* This is used when switching tasks or entering/exiting vm86 mode. */ 90/* This is used when switching tasks or entering/exiting vm86 mode. */
91static inline void update_sp0(struct task_struct *task) 91static inline void update_sp0(struct task_struct *task)
92{ 92{
93 /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
93#ifdef CONFIG_X86_32 94#ifdef CONFIG_X86_32
94 load_sp0(task->thread.sp0); 95 load_sp0(task->thread.sp0);
95#else 96#else
96 load_sp0(task_top_of_stack(task)); 97 if (static_cpu_has(X86_FEATURE_XENPV))
98 load_sp0(task_top_of_stack(task));
97#endif 99#endif
98} 100}
99 101
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 70f425947dc5..00223333821a 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack,
207#else /* !__ASSEMBLY__ */ 207#else /* !__ASSEMBLY__ */
208 208
209#ifdef CONFIG_X86_64 209#ifdef CONFIG_X86_64
210# define cpu_current_top_of_stack (cpu_tss + TSS_sp0) 210# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
211#endif 211#endif
212 212
213#endif 213#endif
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 1fadd310ff68..31051f35cbb7 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
75dotraplinkage void do_stack_segment(struct pt_regs *, long); 75dotraplinkage void do_stack_segment(struct pt_regs *, long);
76#ifdef CONFIG_X86_64 76#ifdef CONFIG_X86_64
77dotraplinkage void do_double_fault(struct pt_regs *, long); 77dotraplinkage void do_double_fault(struct pt_regs *, long);
78asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
79#endif 78#endif
80dotraplinkage void do_general_protection(struct pt_regs *, long); 79dotraplinkage void do_general_protection(struct pt_regs *, long);
81dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); 80dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index e9cc6fe1fc6f..c1688c2d0a12 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -7,6 +7,9 @@
7#include <asm/ptrace.h> 7#include <asm/ptrace.h>
8#include <asm/stacktrace.h> 8#include <asm/stacktrace.h>
9 9
10#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
11#define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
12
10struct unwind_state { 13struct unwind_state {
11 struct stack_info stack_info; 14 struct stack_info stack_info;
12 unsigned long stack_mask; 15 unsigned long stack_mask;
@@ -52,6 +55,10 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
52} 55}
53 56
54#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) 57#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
58/*
59 * WARNING: The entire pt_regs may not be safe to dereference. In some cases,
60 * only the iret frame registers are accessible. Use with caution!
61 */
55static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) 62static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
56{ 63{
57 if (unwind_done(state)) 64 if (unwind_done(state))
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 8ea78275480d..cd360a5e0dca 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -93,4 +93,10 @@ void common(void) {
93 93
94 BLANK(); 94 BLANK();
95 DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); 95 DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
96
97 /* Layout info for cpu_entry_area */
98 OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
99 OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
100 OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page);
101 DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack));
96} 102}
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index dedf428b20b6..7d20d9c0b3d6 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -47,13 +47,8 @@ void foo(void)
47 BLANK(); 47 BLANK();
48 48
49 /* Offset from the sysenter stack to tss.sp0 */ 49 /* Offset from the sysenter stack to tss.sp0 */
50 DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - 50 DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
51 offsetofend(struct tss_struct, SYSENTER_stack)); 51 offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack));
52
53 /* Offset from cpu_tss to SYSENTER_stack */
54 OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
55 /* Size of SYSENTER_stack */
56 DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
57 52
58#ifdef CONFIG_CC_STACKPROTECTOR 53#ifdef CONFIG_CC_STACKPROTECTOR
59 BLANK(); 54 BLANK();
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 630212fa9b9d..bf51e51d808d 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -23,6 +23,9 @@ int main(void)
23#ifdef CONFIG_PARAVIRT 23#ifdef CONFIG_PARAVIRT
24 OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); 24 OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
25 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); 25 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
26#ifdef CONFIG_DEBUG_ENTRY
27 OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
28#endif
26 BLANK(); 29 BLANK();
27#endif 30#endif
28 31
@@ -63,6 +66,7 @@ int main(void)
63 66
64 OFFSET(TSS_ist, tss_struct, x86_tss.ist); 67 OFFSET(TSS_ist, tss_struct, x86_tss.ist);
65 OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); 68 OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
69 OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
66 BLANK(); 70 BLANK();
67 71
68#ifdef CONFIG_CC_STACKPROTECTOR 72#ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index fa998ca8aa5a..7416da3ec4df 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -476,8 +476,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
476 return NULL; /* Not found */ 476 return NULL; /* Not found */
477} 477}
478 478
479__u32 cpu_caps_cleared[NCAPINTS]; 479__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
480__u32 cpu_caps_set[NCAPINTS]; 480__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
481 481
482void load_percpu_segment(int cpu) 482void load_percpu_segment(int cpu)
483{ 483{
@@ -490,27 +490,116 @@ void load_percpu_segment(int cpu)
490 load_stack_canary_segment(); 490 load_stack_canary_segment();
491} 491}
492 492
493/* Setup the fixmap mapping only once per-processor */ 493#ifdef CONFIG_X86_32
494static inline void setup_fixmap_gdt(int cpu) 494/* The 32-bit entry code needs to find cpu_entry_area. */
495DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
496#endif
497
498#ifdef CONFIG_X86_64
499/*
500 * Special IST stacks which the CPU switches to when it calls
501 * an IST-marked descriptor entry. Up to 7 stacks (hardware
502 * limit), all of them are 4K, except the debug stack which
503 * is 8K.
504 */
505static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
506 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
507 [DEBUG_STACK - 1] = DEBUG_STKSZ
508};
509
510static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
511 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
512#endif
513
514static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page,
515 SYSENTER_stack_storage);
516
517static void __init
518set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
519{
520 for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
521 __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
522}
523
524/* Setup the fixmap mappings only once per-processor */
525static void __init setup_cpu_entry_area(int cpu)
495{ 526{
496#ifdef CONFIG_X86_64 527#ifdef CONFIG_X86_64
497 /* On 64-bit systems, we use a read-only fixmap GDT. */ 528 extern char _entry_trampoline[];
498 pgprot_t prot = PAGE_KERNEL_RO; 529
530 /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
531 pgprot_t gdt_prot = PAGE_KERNEL_RO;
532 pgprot_t tss_prot = PAGE_KERNEL_RO;
499#else 533#else
500 /* 534 /*
501 * On native 32-bit systems, the GDT cannot be read-only because 535 * On native 32-bit systems, the GDT cannot be read-only because
502 * our double fault handler uses a task gate, and entering through 536 * our double fault handler uses a task gate, and entering through
503 * a task gate needs to change an available TSS to busy. If the GDT 537 * a task gate needs to change an available TSS to busy. If the
504 * is read-only, that will triple fault. 538 * GDT is read-only, that will triple fault. The TSS cannot be
539 * read-only because the CPU writes to it on task switches.
505 * 540 *
506 * On Xen PV, the GDT must be read-only because the hypervisor requires 541 * On Xen PV, the GDT must be read-only because the hypervisor
507 * it. 542 * requires it.
508 */ 543 */
509 pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ? 544 pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
510 PAGE_KERNEL_RO : PAGE_KERNEL; 545 PAGE_KERNEL_RO : PAGE_KERNEL;
546 pgprot_t tss_prot = PAGE_KERNEL;
511#endif 547#endif
512 548
513 __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot); 549 __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
550 set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page),
551 per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1,
552 PAGE_KERNEL);
553
554 /*
555 * The Intel SDM says (Volume 3, 7.2.1):
556 *
557 * Avoid placing a page boundary in the part of the TSS that the
558 * processor reads during a task switch (the first 104 bytes). The
559 * processor may not correctly perform address translations if a
560 * boundary occurs in this area. During a task switch, the processor
561 * reads and writes into the first 104 bytes of each TSS (using
562 * contiguous physical addresses beginning with the physical address
563 * of the first byte of the TSS). So, after TSS access begins, if
564 * part of the 104 bytes is not physically contiguous, the processor
565 * will access incorrect information without generating a page-fault
566 * exception.
567 *
568 * There are also a lot of errata involving the TSS spanning a page
569 * boundary. Assert that we're not doing that.
570 */
571 BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
572 offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
573 BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
574 set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
575 &per_cpu(cpu_tss_rw, cpu),
576 sizeof(struct tss_struct) / PAGE_SIZE,
577 tss_prot);
578
579#ifdef CONFIG_X86_32
580 per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
581#endif
582
583#ifdef CONFIG_X86_64
584 BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
585 BUILD_BUG_ON(sizeof(exception_stacks) !=
586 sizeof(((struct cpu_entry_area *)0)->exception_stacks));
587 set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
588 &per_cpu(exception_stacks, cpu),
589 sizeof(exception_stacks) / PAGE_SIZE,
590 PAGE_KERNEL);
591
592 __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
593 __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
594#endif
595}
596
597void __init setup_cpu_entry_areas(void)
598{
599 unsigned int cpu;
600
601 for_each_possible_cpu(cpu)
602 setup_cpu_entry_area(cpu);
514} 603}
515 604
516/* Load the original GDT from the per-cpu structure */ 605/* Load the original GDT from the per-cpu structure */
@@ -747,7 +836,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
747{ 836{
748 int i; 837 int i;
749 838
750 for (i = 0; i < NCAPINTS; i++) { 839 for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
751 c->x86_capability[i] &= ~cpu_caps_cleared[i]; 840 c->x86_capability[i] &= ~cpu_caps_cleared[i];
752 c->x86_capability[i] |= cpu_caps_set[i]; 841 c->x86_capability[i] |= cpu_caps_set[i];
753 } 842 }
@@ -1250,7 +1339,7 @@ void enable_sep_cpu(void)
1250 return; 1339 return;
1251 1340
1252 cpu = get_cpu(); 1341 cpu = get_cpu();
1253 tss = &per_cpu(cpu_tss, cpu); 1342 tss = &per_cpu(cpu_tss_rw, cpu);
1254 1343
1255 /* 1344 /*
1256 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- 1345 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
@@ -1259,11 +1348,7 @@ void enable_sep_cpu(void)
1259 1348
1260 tss->x86_tss.ss1 = __KERNEL_CS; 1349 tss->x86_tss.ss1 = __KERNEL_CS;
1261 wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); 1350 wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
1262 1351 wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0);
1263 wrmsr(MSR_IA32_SYSENTER_ESP,
1264 (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
1265 0);
1266
1267 wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); 1352 wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
1268 1353
1269 put_cpu(); 1354 put_cpu();
@@ -1357,25 +1442,19 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
1357DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; 1442DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
1358EXPORT_PER_CPU_SYMBOL(__preempt_count); 1443EXPORT_PER_CPU_SYMBOL(__preempt_count);
1359 1444
1360/*
1361 * Special IST stacks which the CPU switches to when it calls
1362 * an IST-marked descriptor entry. Up to 7 stacks (hardware
1363 * limit), all of them are 4K, except the debug stack which
1364 * is 8K.
1365 */
1366static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
1367 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
1368 [DEBUG_STACK - 1] = DEBUG_STKSZ
1369};
1370
1371static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
1372 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
1373
1374/* May not be marked __init: used by software suspend */ 1445/* May not be marked __init: used by software suspend */
1375void syscall_init(void) 1446void syscall_init(void)
1376{ 1447{
1448 extern char _entry_trampoline[];
1449 extern char entry_SYSCALL_64_trampoline[];
1450
1451 int cpu = smp_processor_id();
1452 unsigned long SYSCALL64_entry_trampoline =
1453 (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
1454 (entry_SYSCALL_64_trampoline - _entry_trampoline);
1455
1377 wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); 1456 wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
1378 wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); 1457 wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
1379 1458
1380#ifdef CONFIG_IA32_EMULATION 1459#ifdef CONFIG_IA32_EMULATION
1381 wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); 1460 wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
@@ -1386,7 +1465,7 @@ void syscall_init(void)
1386 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). 1465 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
1387 */ 1466 */
1388 wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); 1467 wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
1389 wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); 1468 wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
1390 wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); 1469 wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
1391#else 1470#else
1392 wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); 1471 wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
@@ -1530,7 +1609,7 @@ void cpu_init(void)
1530 if (cpu) 1609 if (cpu)
1531 load_ucode_ap(); 1610 load_ucode_ap();
1532 1611
1533 t = &per_cpu(cpu_tss, cpu); 1612 t = &per_cpu(cpu_tss_rw, cpu);
1534 oist = &per_cpu(orig_ist, cpu); 1613 oist = &per_cpu(orig_ist, cpu);
1535 1614
1536#ifdef CONFIG_NUMA 1615#ifdef CONFIG_NUMA
@@ -1569,7 +1648,7 @@ void cpu_init(void)
1569 * set up and load the per-CPU TSS 1648 * set up and load the per-CPU TSS
1570 */ 1649 */
1571 if (!oist->ist[0]) { 1650 if (!oist->ist[0]) {
1572 char *estacks = per_cpu(exception_stacks, cpu); 1651 char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
1573 1652
1574 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 1653 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1575 estacks += exception_stack_sizes[v]; 1654 estacks += exception_stack_sizes[v];
@@ -1580,7 +1659,7 @@ void cpu_init(void)
1580 } 1659 }
1581 } 1660 }
1582 1661
1583 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); 1662 t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
1584 1663
1585 /* 1664 /*
1586 * <= is required because the CPU will access up to 1665 * <= is required because the CPU will access up to
@@ -1596,11 +1675,12 @@ void cpu_init(void)
1596 enter_lazy_tlb(&init_mm, me); 1675 enter_lazy_tlb(&init_mm, me);
1597 1676
1598 /* 1677 /*
1599 * Initialize the TSS. Don't bother initializing sp0, as the initial 1678 * Initialize the TSS. sp0 points to the entry trampoline stack
1600 * task never enters user mode. 1679 * regardless of what task is running.
1601 */ 1680 */
1602 set_tss_desc(cpu, t); 1681 set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
1603 load_TR_desc(); 1682 load_TR_desc();
1683 load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
1604 1684
1605 load_mm_ldt(&init_mm); 1685 load_mm_ldt(&init_mm);
1606 1686
@@ -1612,7 +1692,6 @@ void cpu_init(void)
1612 if (is_uv_system()) 1692 if (is_uv_system())
1613 uv_cpu_init(); 1693 uv_cpu_init();
1614 1694
1615 setup_fixmap_gdt(cpu);
1616 load_fixmap_gdt(cpu); 1695 load_fixmap_gdt(cpu);
1617} 1696}
1618 1697
@@ -1622,7 +1701,7 @@ void cpu_init(void)
1622{ 1701{
1623 int cpu = smp_processor_id(); 1702 int cpu = smp_processor_id();
1624 struct task_struct *curr = current; 1703 struct task_struct *curr = current;
1625 struct tss_struct *t = &per_cpu(cpu_tss, cpu); 1704 struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
1626 1705
1627 wait_for_master_cpu(cpu); 1706 wait_for_master_cpu(cpu);
1628 1707
@@ -1657,12 +1736,12 @@ void cpu_init(void)
1657 * Initialize the TSS. Don't bother initializing sp0, as the initial 1736 * Initialize the TSS. Don't bother initializing sp0, as the initial
1658 * task never enters user mode. 1737 * task never enters user mode.
1659 */ 1738 */
1660 set_tss_desc(cpu, t); 1739 set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
1661 load_TR_desc(); 1740 load_TR_desc();
1662 1741
1663 load_mm_ldt(&init_mm); 1742 load_mm_ldt(&init_mm);
1664 1743
1665 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); 1744 t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
1666 1745
1667#ifdef CONFIG_DOUBLEFAULT 1746#ifdef CONFIG_DOUBLEFAULT
1668 /* Set up doublefault TSS pointer in the GDT */ 1747 /* Set up doublefault TSS pointer in the GDT */
@@ -1674,7 +1753,6 @@ void cpu_init(void)
1674 1753
1675 fpu__init_cpu(); 1754 fpu__init_cpu();
1676 1755
1677 setup_fixmap_gdt(cpu);
1678 load_fixmap_gdt(cpu); 1756 load_fixmap_gdt(cpu);
1679} 1757}
1680#endif 1758#endif
diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
index 0e662c55ae90..0b8cedb20d6d 100644
--- a/arch/x86/kernel/doublefault.c
+++ b/arch/x86/kernel/doublefault.c
@@ -50,25 +50,23 @@ static void doublefault_fn(void)
50 cpu_relax(); 50 cpu_relax();
51} 51}
52 52
53struct tss_struct doublefault_tss __cacheline_aligned = { 53struct x86_hw_tss doublefault_tss __cacheline_aligned = {
54 .x86_tss = { 54 .sp0 = STACK_START,
55 .sp0 = STACK_START, 55 .ss0 = __KERNEL_DS,
56 .ss0 = __KERNEL_DS, 56 .ldt = 0,
57 .ldt = 0, 57 .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
58 .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, 58
59 59 .ip = (unsigned long) doublefault_fn,
60 .ip = (unsigned long) doublefault_fn, 60 /* 0x2 bit is always set */
61 /* 0x2 bit is always set */ 61 .flags = X86_EFLAGS_SF | 0x2,
62 .flags = X86_EFLAGS_SF | 0x2, 62 .sp = STACK_START,
63 .sp = STACK_START, 63 .es = __USER_DS,
64 .es = __USER_DS, 64 .cs = __KERNEL_CS,
65 .cs = __KERNEL_CS, 65 .ss = __KERNEL_DS,
66 .ss = __KERNEL_DS, 66 .ds = __USER_DS,
67 .ds = __USER_DS, 67 .fs = __KERNEL_PERCPU,
68 .fs = __KERNEL_PERCPU, 68
69 69 .__cr3 = __pa_nodebug(swapper_pg_dir),
70 .__cr3 = __pa_nodebug(swapper_pg_dir),
71 }
72}; 70};
73 71
74/* dummy for do_double_fault() call */ 72/* dummy for do_double_fault() call */
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index f13b4c00a5de..bbd6d986e2d0 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -43,6 +43,24 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
43 return true; 43 return true;
44} 44}
45 45
46bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
47{
48 struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id());
49
50 void *begin = ss;
51 void *end = ss + 1;
52
53 if ((void *)stack < begin || (void *)stack >= end)
54 return false;
55
56 info->type = STACK_TYPE_SYSENTER;
57 info->begin = begin;
58 info->end = end;
59 info->next_sp = NULL;
60
61 return true;
62}
63
46static void printk_stack_address(unsigned long address, int reliable, 64static void printk_stack_address(unsigned long address, int reliable,
47 char *log_lvl) 65 char *log_lvl)
48{ 66{
@@ -50,6 +68,28 @@ static void printk_stack_address(unsigned long address, int reliable,
50 printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); 68 printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
51} 69}
52 70
71void show_iret_regs(struct pt_regs *regs)
72{
73 printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
74 printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
75 regs->sp, regs->flags);
76}
77
78static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
79{
80 if (on_stack(info, regs, sizeof(*regs)))
81 __show_regs(regs, 0);
82 else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
83 IRET_FRAME_SIZE)) {
84 /*
85 * When an interrupt or exception occurs in entry code, the
86 * full pt_regs might not have been saved yet. In that case
87 * just print the iret frame.
88 */
89 show_iret_regs(regs);
90 }
91}
92
53void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 93void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
54 unsigned long *stack, char *log_lvl) 94 unsigned long *stack, char *log_lvl)
55{ 95{
@@ -71,31 +111,35 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
71 * - task stack 111 * - task stack
72 * - interrupt stack 112 * - interrupt stack
73 * - HW exception stacks (double fault, nmi, debug, mce) 113 * - HW exception stacks (double fault, nmi, debug, mce)
114 * - SYSENTER stack
74 * 115 *
75 * x86-32 can have up to three stacks: 116 * x86-32 can have up to four stacks:
76 * - task stack 117 * - task stack
77 * - softirq stack 118 * - softirq stack
78 * - hardirq stack 119 * - hardirq stack
120 * - SYSENTER stack
79 */ 121 */
80 for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { 122 for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
81 const char *stack_name; 123 const char *stack_name;
82 124
83 /* 125 if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
84 * If we overflowed the task stack into a guard page, jump back 126 /*
85 * to the bottom of the usable stack. 127 * We weren't on a valid stack. It's possible that
86 */ 128 * we overflowed a valid stack into a guard page.
87 if (task_stack_page(task) - (void *)stack < PAGE_SIZE) 129 * See if the next page up is valid so that we can
88 stack = task_stack_page(task); 130 * generate some kind of backtrace if this happens.
89 131 */
90 if (get_stack_info(stack, task, &stack_info, &visit_mask)) 132 stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
91 break; 133 if (get_stack_info(stack, task, &stack_info, &visit_mask))
134 break;
135 }
92 136
93 stack_name = stack_type_name(stack_info.type); 137 stack_name = stack_type_name(stack_info.type);
94 if (stack_name) 138 if (stack_name)
95 printk("%s <%s>\n", log_lvl, stack_name); 139 printk("%s <%s>\n", log_lvl, stack_name);
96 140
97 if (regs && on_stack(&stack_info, regs, sizeof(*regs))) 141 if (regs)
98 __show_regs(regs, 0); 142 show_regs_safe(&stack_info, regs);
99 143
100 /* 144 /*
101 * Scan the stack, printing any text addresses we find. At the 145 * Scan the stack, printing any text addresses we find. At the
@@ -119,7 +163,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
119 163
120 /* 164 /*
121 * Don't print regs->ip again if it was already printed 165 * Don't print regs->ip again if it was already printed
122 * by __show_regs() below. 166 * by show_regs_safe() below.
123 */ 167 */
124 if (regs && stack == &regs->ip) 168 if (regs && stack == &regs->ip)
125 goto next; 169 goto next;
@@ -155,8 +199,8 @@ next:
155 199
156 /* if the frame has entry regs, print them */ 200 /* if the frame has entry regs, print them */
157 regs = unwind_get_entry_regs(&state); 201 regs = unwind_get_entry_regs(&state);
158 if (regs && on_stack(&stack_info, regs, sizeof(*regs))) 202 if (regs)
159 __show_regs(regs, 0); 203 show_regs_safe(&stack_info, regs);
160 } 204 }
161 205
162 if (stack_name) 206 if (stack_name)
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index daefae83a3aa..5ff13a6b3680 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type)
26 if (type == STACK_TYPE_SOFTIRQ) 26 if (type == STACK_TYPE_SOFTIRQ)
27 return "SOFTIRQ"; 27 return "SOFTIRQ";
28 28
29 if (type == STACK_TYPE_SYSENTER)
30 return "SYSENTER";
31
29 return NULL; 32 return NULL;
30} 33}
31 34
@@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
93 if (task != current) 96 if (task != current)
94 goto unknown; 97 goto unknown;
95 98
99 if (in_sysenter_stack(stack, info))
100 goto recursion_check;
101
96 if (in_hardirq_stack(stack, info)) 102 if (in_hardirq_stack(stack, info))
97 goto recursion_check; 103 goto recursion_check;
98 104
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 88ce2ffdb110..abc828f8c297 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -37,6 +37,9 @@ const char *stack_type_name(enum stack_type type)
37 if (type == STACK_TYPE_IRQ) 37 if (type == STACK_TYPE_IRQ)
38 return "IRQ"; 38 return "IRQ";
39 39
40 if (type == STACK_TYPE_SYSENTER)
41 return "SYSENTER";
42
40 if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST) 43 if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
41 return exception_stack_names[type - STACK_TYPE_EXCEPTION]; 44 return exception_stack_names[type - STACK_TYPE_EXCEPTION];
42 45
@@ -115,6 +118,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
115 if (in_irq_stack(stack, info)) 118 if (in_irq_stack(stack, info))
116 goto recursion_check; 119 goto recursion_check;
117 120
121 if (in_sysenter_stack(stack, info))
122 goto recursion_check;
123
118 goto unknown; 124 goto unknown;
119 125
120recursion_check: 126recursion_check:
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 3feb648781c4..2f723301eb58 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
67 * because the ->io_bitmap_max value must match the bitmap 67 * because the ->io_bitmap_max value must match the bitmap
68 * contents: 68 * contents:
69 */ 69 */
70 tss = &per_cpu(cpu_tss, get_cpu()); 70 tss = &per_cpu(cpu_tss_rw, get_cpu());
71 71
72 if (turn_on) 72 if (turn_on)
73 bitmap_clear(t->io_bitmap_ptr, from, num); 73 bitmap_clear(t->io_bitmap_ptr, from, num);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 49cfd9fe7589..68e1867cca80 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
219 /* high bit used in ret_from_ code */ 219 /* high bit used in ret_from_ code */
220 unsigned vector = ~regs->orig_ax; 220 unsigned vector = ~regs->orig_ax;
221 221
222 /*
223 * NB: Unlike exception entries, IRQ entries do not reliably
224 * handle context tracking in the low-level entry code. This is
225 * because syscall entries execute briefly with IRQs on before
226 * updating context tracking state, so we can take an IRQ from
227 * kernel mode with CONTEXT_USER. The low-level entry code only
228 * updates the context if we came from user mode, so we won't
229 * switch to CONTEXT_KERNEL. We'll fix that once the syscall
230 * code is cleaned up enough that we can cleanly defer enabling
231 * IRQs.
232 */
233
234 entering_irq(); 222 entering_irq();
235 223
236 /* entering_irq() tells RCU that we're not quiescent. Check it. */ 224 /* entering_irq() tells RCU that we're not quiescent. Check it. */
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 020efbf5786b..d86e344f5b3d 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs)
57 if (regs->sp >= estack_top && regs->sp <= estack_bottom) 57 if (regs->sp >= estack_top && regs->sp <= estack_bottom)
58 return; 58 return;
59 59
60 WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n", 60 WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
61 current->comm, curbase, regs->sp, 61 current->comm, curbase, regs->sp,
62 irq_stack_top, irq_stack_bottom, 62 irq_stack_top, irq_stack_bottom,
63 estack_top, estack_bottom); 63 estack_top, estack_bottom, (void *)regs->ip);
64 64
65 if (sysctl_panic_on_stackoverflow) 65 if (sysctl_panic_on_stackoverflow)
66 panic("low stack detected by irq handler - check messages\n"); 66 panic("low stack detected by irq handler - check messages\n");
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index ac0be8283325..9edadabf04f6 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
10DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); 10DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
11DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); 11DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
12DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); 12DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
13DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
14DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); 13DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
15 14
16DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); 15DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
@@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
60 PATCH_SITE(pv_mmu_ops, read_cr2); 59 PATCH_SITE(pv_mmu_ops, read_cr2);
61 PATCH_SITE(pv_mmu_ops, read_cr3); 60 PATCH_SITE(pv_mmu_ops, read_cr3);
62 PATCH_SITE(pv_mmu_ops, write_cr3); 61 PATCH_SITE(pv_mmu_ops, write_cr3);
63 PATCH_SITE(pv_mmu_ops, flush_tlb_single);
64 PATCH_SITE(pv_cpu_ops, wbinvd); 62 PATCH_SITE(pv_cpu_ops, wbinvd);
65#if defined(CONFIG_PARAVIRT_SPINLOCKS) 63#if defined(CONFIG_PARAVIRT_SPINLOCKS)
66 case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): 64 case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index bb988a24db92..aed9d94bd46f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -47,7 +47,7 @@
47 * section. Since TSS's are completely CPU-local, we want them 47 * section. Since TSS's are completely CPU-local, we want them
48 * on exact cacheline boundaries, to eliminate cacheline ping-pong. 48 * on exact cacheline boundaries, to eliminate cacheline ping-pong.
49 */ 49 */
50__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { 50__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
51 .x86_tss = { 51 .x86_tss = {
52 /* 52 /*
53 * .sp0 is only used when entering ring 0 from a lower 53 * .sp0 is only used when entering ring 0 from a lower
@@ -56,6 +56,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
56 * Poison it. 56 * Poison it.
57 */ 57 */
58 .sp0 = (1UL << (BITS_PER_LONG-1)) + 1, 58 .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
59
60#ifdef CONFIG_X86_64
61 /*
62 * .sp1 is cpu_current_top_of_stack. The init task never
63 * runs user code, but cpu_current_top_of_stack should still
64 * be well defined before the first context switch.
65 */
66 .sp1 = TOP_OF_INIT_STACK,
67#endif
68
59#ifdef CONFIG_X86_32 69#ifdef CONFIG_X86_32
60 .ss0 = __KERNEL_DS, 70 .ss0 = __KERNEL_DS,
61 .ss1 = __KERNEL_CS, 71 .ss1 = __KERNEL_CS,
@@ -71,11 +81,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
71 */ 81 */
72 .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, 82 .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
73#endif 83#endif
74#ifdef CONFIG_X86_32
75 .SYSENTER_stack_canary = STACK_END_MAGIC,
76#endif
77}; 84};
78EXPORT_PER_CPU_SYMBOL(cpu_tss); 85EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
79 86
80DEFINE_PER_CPU(bool, __tss_limit_invalid); 87DEFINE_PER_CPU(bool, __tss_limit_invalid);
81EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid); 88EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
@@ -104,7 +111,7 @@ void exit_thread(struct task_struct *tsk)
104 struct fpu *fpu = &t->fpu; 111 struct fpu *fpu = &t->fpu;
105 112
106 if (bp) { 113 if (bp) {
107 struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu()); 114 struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
108 115
109 t->io_bitmap_ptr = NULL; 116 t->io_bitmap_ptr = NULL;
110 clear_thread_flag(TIF_IO_BITMAP); 117 clear_thread_flag(TIF_IO_BITMAP);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 45bf0c5f93e1..5224c6099184 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
234 struct fpu *prev_fpu = &prev->fpu; 234 struct fpu *prev_fpu = &prev->fpu;
235 struct fpu *next_fpu = &next->fpu; 235 struct fpu *next_fpu = &next->fpu;
236 int cpu = smp_processor_id(); 236 int cpu = smp_processor_id();
237 struct tss_struct *tss = &per_cpu(cpu_tss, cpu); 237 struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
238 238
239 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ 239 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
240 240
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index eeeb34f85c25..c75466232016 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all)
69 unsigned int fsindex, gsindex; 69 unsigned int fsindex, gsindex;
70 unsigned int ds, cs, es; 70 unsigned int ds, cs, es;
71 71
72 printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip); 72 show_iret_regs(regs);
73 printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss, 73
74 regs->sp, regs->flags);
75 if (regs->orig_ax != -1) 74 if (regs->orig_ax != -1)
76 pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax); 75 pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
77 else 76 else
@@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all)
88 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", 87 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
89 regs->r13, regs->r14, regs->r15); 88 regs->r13, regs->r14, regs->r15);
90 89
90 if (!all)
91 return;
92
91 asm("movl %%ds,%0" : "=r" (ds)); 93 asm("movl %%ds,%0" : "=r" (ds));
92 asm("movl %%cs,%0" : "=r" (cs)); 94 asm("movl %%cs,%0" : "=r" (cs));
93 asm("movl %%es,%0" : "=r" (es)); 95 asm("movl %%es,%0" : "=r" (es));
@@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all)
98 rdmsrl(MSR_GS_BASE, gs); 100 rdmsrl(MSR_GS_BASE, gs);
99 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 101 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
100 102
101 if (!all)
102 return;
103
104 cr0 = read_cr0(); 103 cr0 = read_cr0();
105 cr2 = read_cr2(); 104 cr2 = read_cr2();
106 cr3 = __read_cr3(); 105 cr3 = __read_cr3();
@@ -400,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
400 struct fpu *prev_fpu = &prev->fpu; 399 struct fpu *prev_fpu = &prev->fpu;
401 struct fpu *next_fpu = &next->fpu; 400 struct fpu *next_fpu = &next->fpu;
402 int cpu = smp_processor_id(); 401 int cpu = smp_processor_id();
403 struct tss_struct *tss = &per_cpu(cpu_tss, cpu); 402 struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
404 403
405 WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && 404 WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
406 this_cpu_read(irq_count) != -1); 405 this_cpu_read(irq_count) != -1);
@@ -462,6 +461,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
462 * Switch the PDA and FPU contexts. 461 * Switch the PDA and FPU contexts.
463 */ 462 */
464 this_cpu_write(current_task, next_p); 463 this_cpu_write(current_task, next_p);
464 this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
465 465
466 /* Reload sp0. */ 466 /* Reload sp0. */
467 update_sp0(next_p); 467 update_sp0(next_p);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 989514c94a55..e98f8b66a460 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -348,9 +348,15 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
348 348
349 /* 349 /*
350 * If IRET takes a non-IST fault on the espfix64 stack, then we 350 * If IRET takes a non-IST fault on the espfix64 stack, then we
351 * end up promoting it to a doublefault. In that case, modify 351 * end up promoting it to a doublefault. In that case, take
352 * the stack to make it look like we just entered the #GP 352 * advantage of the fact that we're not using the normal (TSS.sp0)
353 * handler from user space, similar to bad_iret. 353 * stack right now. We can write a fake #GP(0) frame at TSS.sp0
354 * and then modify our own IRET frame so that, when we return,
355 * we land directly at the #GP(0) vector with the stack already
356 * set up according to its expectations.
357 *
358 * The net result is that our #GP handler will think that we
359 * entered from usermode with the bad user context.
354 * 360 *
355 * No need for ist_enter here because we don't use RCU. 361 * No need for ist_enter here because we don't use RCU.
356 */ 362 */
@@ -358,13 +364,26 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
358 regs->cs == __KERNEL_CS && 364 regs->cs == __KERNEL_CS &&
359 regs->ip == (unsigned long)native_irq_return_iret) 365 regs->ip == (unsigned long)native_irq_return_iret)
360 { 366 {
361 struct pt_regs *normal_regs = task_pt_regs(current); 367 struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
362 368
363 /* Fake a #GP(0) from userspace. */ 369 /*
364 memmove(&normal_regs->ip, (void *)regs->sp, 5*8); 370 * regs->sp points to the failing IRET frame on the
365 normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ 371 * ESPFIX64 stack. Copy it to the entry stack. This fills
372 * in gpregs->ss through gpregs->ip.
373 *
374 */
375 memmove(&gpregs->ip, (void *)regs->sp, 5*8);
376 gpregs->orig_ax = 0; /* Missing (lost) #GP error code */
377
378 /*
379 * Adjust our frame so that we return straight to the #GP
380 * vector with the expected RSP value. This is safe because
381 * we won't enable interupts or schedule before we invoke
382 * general_protection, so nothing will clobber the stack
383 * frame we just set up.
384 */
366 regs->ip = (unsigned long)general_protection; 385 regs->ip = (unsigned long)general_protection;
367 regs->sp = (unsigned long)&normal_regs->orig_ax; 386 regs->sp = (unsigned long)&gpregs->orig_ax;
368 387
369 return; 388 return;
370 } 389 }
@@ -389,7 +408,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
389 * 408 *
390 * Processors update CR2 whenever a page fault is detected. If a 409 * Processors update CR2 whenever a page fault is detected. If a
391 * second page fault occurs while an earlier page fault is being 410 * second page fault occurs while an earlier page fault is being
392 * deliv- ered, the faulting linear address of the second fault will 411 * delivered, the faulting linear address of the second fault will
393 * overwrite the contents of CR2 (replacing the previous 412 * overwrite the contents of CR2 (replacing the previous
394 * address). These updates to CR2 occur even if the page fault 413 * address). These updates to CR2 occur even if the page fault
395 * results in a double fault or occurs during the delivery of a 414 * results in a double fault or occurs during the delivery of a
@@ -605,14 +624,15 @@ NOKPROBE_SYMBOL(do_int3);
605 624
606#ifdef CONFIG_X86_64 625#ifdef CONFIG_X86_64
607/* 626/*
608 * Help handler running on IST stack to switch off the IST stack if the 627 * Help handler running on a per-cpu (IST or entry trampoline) stack
609 * interrupted code was in user mode. The actual stack switch is done in 628 * to switch to the normal thread stack if the interrupted code was in
610 * entry_64.S 629 * user mode. The actual stack switch is done in entry_64.S
611 */ 630 */
612asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) 631asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
613{ 632{
614 struct pt_regs *regs = task_pt_regs(current); 633 struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
615 *regs = *eregs; 634 if (regs != eregs)
635 *regs = *eregs;
616 return regs; 636 return regs;
617} 637}
618NOKPROBE_SYMBOL(sync_regs); 638NOKPROBE_SYMBOL(sync_regs);
@@ -628,13 +648,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
628 /* 648 /*
629 * This is called from entry_64.S early in handling a fault 649 * This is called from entry_64.S early in handling a fault
630 * caused by a bad iret to user mode. To handle the fault 650 * caused by a bad iret to user mode. To handle the fault
631 * correctly, we want move our stack frame to task_pt_regs 651 * correctly, we want to move our stack frame to where it would
632 * and we want to pretend that the exception came from the 652 * be had we entered directly on the entry stack (rather than
633 * iret target. 653 * just below the IRET frame) and we want to pretend that the
654 * exception came from the IRET target.
634 */ 655 */
635 struct bad_iret_stack *new_stack = 656 struct bad_iret_stack *new_stack =
636 container_of(task_pt_regs(current), 657 (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
637 struct bad_iret_stack, regs);
638 658
639 /* Copy the IRET target to the new stack. */ 659 /* Copy the IRET target to the new stack. */
640 memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); 660 memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
@@ -795,14 +815,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
795 debug_stack_usage_dec(); 815 debug_stack_usage_dec();
796 816
797exit: 817exit:
798#if defined(CONFIG_X86_32)
799 /*
800 * This is the most likely code path that involves non-trivial use
801 * of the SYSENTER stack. Check that we haven't overrun it.
802 */
803 WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
804 "Overran or corrupted SYSENTER stack\n");
805#endif
806 ist_exit(regs); 818 ist_exit(regs);
807} 819}
808NOKPROBE_SYMBOL(do_debug); 820NOKPROBE_SYMBOL(do_debug);
@@ -929,6 +941,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
929 941
930void __init trap_init(void) 942void __init trap_init(void)
931{ 943{
944 /* Init cpu_entry_area before IST entries are set up */
945 setup_cpu_entry_areas();
946
932 idt_setup_traps(); 947 idt_setup_traps();
933 948
934 /* 949 /*
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index a3f973b2c97a..be86a865087a 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
253 return NULL; 253 return NULL;
254} 254}
255 255
256static bool stack_access_ok(struct unwind_state *state, unsigned long addr, 256static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
257 size_t len) 257 size_t len)
258{ 258{
259 struct stack_info *info = &state->stack_info; 259 struct stack_info *info = &state->stack_info;
260 void *addr = (void *)_addr;
260 261
261 /* 262 if (!on_stack(info, addr, len) &&
262 * If the address isn't on the current stack, switch to the next one. 263 (get_stack_info(addr, state->task, info, &state->stack_mask)))
263 * 264 return false;
264 * We may have to traverse multiple stacks to deal with the possibility
265 * that info->next_sp could point to an empty stack and the address
266 * could be on a subsequent stack.
267 */
268 while (!on_stack(info, (void *)addr, len))
269 if (get_stack_info(info->next_sp, state->task, info,
270 &state->stack_mask))
271 return false;
272 265
273 return true; 266 return true;
274} 267}
@@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
283 return true; 276 return true;
284} 277}
285 278
286#define REGS_SIZE (sizeof(struct pt_regs))
287#define SP_OFFSET (offsetof(struct pt_regs, sp))
288#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
289#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
290
291static bool deref_stack_regs(struct unwind_state *state, unsigned long addr, 279static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
292 unsigned long *ip, unsigned long *sp, bool full) 280 unsigned long *ip, unsigned long *sp)
293{ 281{
294 size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE; 282 struct pt_regs *regs = (struct pt_regs *)addr;
295 size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
296 struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
297
298 if (IS_ENABLED(CONFIG_X86_64)) {
299 if (!stack_access_ok(state, addr, regs_size))
300 return false;
301 283
302 *ip = regs->ip; 284 /* x86-32 support will be more complicated due to the &regs->sp hack */
303 *sp = regs->sp; 285 BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));
304 286
305 return true; 287 if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
306 }
307
308 if (!stack_access_ok(state, addr, sp_offset))
309 return false; 288 return false;
310 289
311 *ip = regs->ip; 290 *ip = regs->ip;
291 *sp = regs->sp;
292 return true;
293}
312 294
313 if (user_mode(regs)) { 295static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
314 if (!stack_access_ok(state, addr + sp_offset, 296 unsigned long *ip, unsigned long *sp)
315 REGS_SIZE - SP_OFFSET)) 297{
316 return false; 298 struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;
317 299
318 *sp = regs->sp; 300 if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
319 } else 301 return false;
320 *sp = (unsigned long)&regs->sp;
321 302
303 *ip = regs->ip;
304 *sp = regs->sp;
322 return true; 305 return true;
323} 306}
324 307
@@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state)
327 unsigned long ip_p, sp, orig_ip, prev_sp = state->sp; 310 unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
328 enum stack_type prev_type = state->stack_info.type; 311 enum stack_type prev_type = state->stack_info.type;
329 struct orc_entry *orc; 312 struct orc_entry *orc;
330 struct pt_regs *ptregs;
331 bool indirect = false; 313 bool indirect = false;
332 314
333 if (unwind_done(state)) 315 if (unwind_done(state))
@@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state)
435 break; 417 break;
436 418
437 case ORC_TYPE_REGS: 419 case ORC_TYPE_REGS:
438 if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) { 420 if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
439 orc_warn("can't dereference registers at %p for ip %pB\n", 421 orc_warn("can't dereference registers at %p for ip %pB\n",
440 (void *)sp, (void *)orig_ip); 422 (void *)sp, (void *)orig_ip);
441 goto done; 423 goto done;
@@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state)
447 break; 429 break;
448 430
449 case ORC_TYPE_REGS_IRET: 431 case ORC_TYPE_REGS_IRET:
450 if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) { 432 if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
451 orc_warn("can't dereference iret registers at %p for ip %pB\n", 433 orc_warn("can't dereference iret registers at %p for ip %pB\n",
452 (void *)sp, (void *)orig_ip); 434 (void *)sp, (void *)orig_ip);
453 goto done; 435 goto done;
454 } 436 }
455 437
456 ptregs = container_of((void *)sp, struct pt_regs, ip); 438 state->regs = (void *)sp - IRET_FRAME_OFFSET;
457 if ((unsigned long)ptregs >= prev_sp && 439 state->full_regs = false;
458 on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
459 state->regs = ptregs;
460 state->full_regs = false;
461 } else
462 state->regs = NULL;
463
464 state->signal = true; 440 state->signal = true;
465 break; 441 break;
466 442
@@ -553,8 +529,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
553 } 529 }
554 530
555 if (get_stack_info((unsigned long *)state->sp, state->task, 531 if (get_stack_info((unsigned long *)state->sp, state->task,
556 &state->stack_info, &state->stack_mask)) 532 &state->stack_info, &state->stack_mask)) {
557 return; 533 /*
534 * We weren't on a valid stack. It's possible that
535 * we overflowed a valid stack into a guard page.
536 * See if the next page up is valid so that we can
537 * generate some kind of backtrace if this happens.
538 */
539 void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
540 if (get_stack_info(next_page, state->task, &state->stack_info,
541 &state->stack_mask))
542 return;
543 }
558 544
559 /* 545 /*
560 * The caller can provide the address of the first frame directly 546 * The caller can provide the address of the first frame directly
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index a4009fb9be87..d2a8b5a24a44 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -107,6 +107,15 @@ SECTIONS
107 SOFTIRQENTRY_TEXT 107 SOFTIRQENTRY_TEXT
108 *(.fixup) 108 *(.fixup)
109 *(.gnu.warning) 109 *(.gnu.warning)
110
111#ifdef CONFIG_X86_64
112 . = ALIGN(PAGE_SIZE);
113 _entry_trampoline = .;
114 *(.entry_trampoline)
115 . = ALIGN(PAGE_SIZE);
116 ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
117#endif
118
110 /* End of text section */ 119 /* End of text section */
111 _etext = .; 120 _etext = .;
112 } :text = 0x9090 121 } :text = 0x9090
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index abe74f779f9d..b514b2b2845a 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2390,9 +2390,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
2390} 2390}
2391 2391
2392static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, 2392static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2393 u64 cr0, u64 cr4) 2393 u64 cr0, u64 cr3, u64 cr4)
2394{ 2394{
2395 int bad; 2395 int bad;
2396 u64 pcid;
2397
2398 /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
2399 pcid = 0;
2400 if (cr4 & X86_CR4_PCIDE) {
2401 pcid = cr3 & 0xfff;
2402 cr3 &= ~0xfff;
2403 }
2404
2405 bad = ctxt->ops->set_cr(ctxt, 3, cr3);
2406 if (bad)
2407 return X86EMUL_UNHANDLEABLE;
2396 2408
2397 /* 2409 /*
2398 * First enable PAE, long mode needs it before CR0.PG = 1 is set. 2410 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
@@ -2411,6 +2423,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2411 bad = ctxt->ops->set_cr(ctxt, 4, cr4); 2423 bad = ctxt->ops->set_cr(ctxt, 4, cr4);
2412 if (bad) 2424 if (bad)
2413 return X86EMUL_UNHANDLEABLE; 2425 return X86EMUL_UNHANDLEABLE;
2426 if (pcid) {
2427 bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
2428 if (bad)
2429 return X86EMUL_UNHANDLEABLE;
2430 }
2431
2414 } 2432 }
2415 2433
2416 return X86EMUL_CONTINUE; 2434 return X86EMUL_CONTINUE;
@@ -2421,11 +2439,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
2421 struct desc_struct desc; 2439 struct desc_struct desc;
2422 struct desc_ptr dt; 2440 struct desc_ptr dt;
2423 u16 selector; 2441 u16 selector;
2424 u32 val, cr0, cr4; 2442 u32 val, cr0, cr3, cr4;
2425 int i; 2443 int i;
2426 2444
2427 cr0 = GET_SMSTATE(u32, smbase, 0x7ffc); 2445 cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
2428 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8)); 2446 cr3 = GET_SMSTATE(u32, smbase, 0x7ff8);
2429 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED; 2447 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
2430 ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0); 2448 ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
2431 2449
@@ -2467,14 +2485,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
2467 2485
2468 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8)); 2486 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
2469 2487
2470 return rsm_enter_protected_mode(ctxt, cr0, cr4); 2488 return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2471} 2489}
2472 2490
2473static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) 2491static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
2474{ 2492{
2475 struct desc_struct desc; 2493 struct desc_struct desc;
2476 struct desc_ptr dt; 2494 struct desc_ptr dt;
2477 u64 val, cr0, cr4; 2495 u64 val, cr0, cr3, cr4;
2478 u32 base3; 2496 u32 base3;
2479 u16 selector; 2497 u16 selector;
2480 int i, r; 2498 int i, r;
@@ -2491,7 +2509,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
2491 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); 2509 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
2492 2510
2493 cr0 = GET_SMSTATE(u64, smbase, 0x7f58); 2511 cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
2494 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50)); 2512 cr3 = GET_SMSTATE(u64, smbase, 0x7f50);
2495 cr4 = GET_SMSTATE(u64, smbase, 0x7f48); 2513 cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
2496 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00)); 2514 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
2497 val = GET_SMSTATE(u64, smbase, 0x7ed0); 2515 val = GET_SMSTATE(u64, smbase, 0x7ed0);
@@ -2519,7 +2537,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
2519 dt.address = GET_SMSTATE(u64, smbase, 0x7e68); 2537 dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
2520 ctxt->ops->set_gdt(ctxt, &dt); 2538 ctxt->ops->set_gdt(ctxt, &dt);
2521 2539
2522 r = rsm_enter_protected_mode(ctxt, cr0, cr4); 2540 r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2523 if (r != X86EMUL_CONTINUE) 2541 if (r != X86EMUL_CONTINUE)
2524 return r; 2542 return r;
2525 2543
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e5e66e5c6640..c4deb1f34faa 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3395,7 +3395,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
3395 spin_lock(&vcpu->kvm->mmu_lock); 3395 spin_lock(&vcpu->kvm->mmu_lock);
3396 if(make_mmu_pages_available(vcpu) < 0) { 3396 if(make_mmu_pages_available(vcpu) < 0) {
3397 spin_unlock(&vcpu->kvm->mmu_lock); 3397 spin_unlock(&vcpu->kvm->mmu_lock);
3398 return 1; 3398 return -ENOSPC;
3399 } 3399 }
3400 sp = kvm_mmu_get_page(vcpu, 0, 0, 3400 sp = kvm_mmu_get_page(vcpu, 0, 0,
3401 vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL); 3401 vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
@@ -3410,7 +3410,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
3410 spin_lock(&vcpu->kvm->mmu_lock); 3410 spin_lock(&vcpu->kvm->mmu_lock);
3411 if (make_mmu_pages_available(vcpu) < 0) { 3411 if (make_mmu_pages_available(vcpu) < 0) {
3412 spin_unlock(&vcpu->kvm->mmu_lock); 3412 spin_unlock(&vcpu->kvm->mmu_lock);
3413 return 1; 3413 return -ENOSPC;
3414 } 3414 }
3415 sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), 3415 sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
3416 i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL); 3416 i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
@@ -3450,7 +3450,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
3450 spin_lock(&vcpu->kvm->mmu_lock); 3450 spin_lock(&vcpu->kvm->mmu_lock);
3451 if (make_mmu_pages_available(vcpu) < 0) { 3451 if (make_mmu_pages_available(vcpu) < 0) {
3452 spin_unlock(&vcpu->kvm->mmu_lock); 3452 spin_unlock(&vcpu->kvm->mmu_lock);
3453 return 1; 3453 return -ENOSPC;
3454 } 3454 }
3455 sp = kvm_mmu_get_page(vcpu, root_gfn, 0, 3455 sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
3456 vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL); 3456 vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
@@ -3487,7 +3487,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
3487 spin_lock(&vcpu->kvm->mmu_lock); 3487 spin_lock(&vcpu->kvm->mmu_lock);
3488 if (make_mmu_pages_available(vcpu) < 0) { 3488 if (make_mmu_pages_available(vcpu) < 0) {
3489 spin_unlock(&vcpu->kvm->mmu_lock); 3489 spin_unlock(&vcpu->kvm->mmu_lock);
3490 return 1; 3490 return -ENOSPC;
3491 } 3491 }
3492 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, 3492 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
3493 0, ACC_ALL); 3493 0, ACC_ALL);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8eba631c4dbd..023afa0c8887 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2302,7 +2302,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2302 * processors. See 22.2.4. 2302 * processors. See 22.2.4.
2303 */ 2303 */
2304 vmcs_writel(HOST_TR_BASE, 2304 vmcs_writel(HOST_TR_BASE,
2305 (unsigned long)this_cpu_ptr(&cpu_tss)); 2305 (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
2306 vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ 2306 vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
2307 2307
2308 /* 2308 /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index faf843c9b916..1cec2c62a0b0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4384,7 +4384,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
4384 addr, n, v)) 4384 addr, n, v))
4385 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) 4385 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
4386 break; 4386 break;
4387 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); 4387 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
4388 handled += n; 4388 handled += n;
4389 addr += n; 4389 addr += n;
4390 len -= n; 4390 len -= n;
@@ -4643,7 +4643,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
4643{ 4643{
4644 if (vcpu->mmio_read_completed) { 4644 if (vcpu->mmio_read_completed) {
4645 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, 4645 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
4646 vcpu->mmio_fragments[0].gpa, *(u64 *)val); 4646 vcpu->mmio_fragments[0].gpa, val);
4647 vcpu->mmio_read_completed = 0; 4647 vcpu->mmio_read_completed = 0;
4648 return 1; 4648 return 1;
4649 } 4649 }
@@ -4665,14 +4665,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
4665 4665
4666static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val) 4666static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
4667{ 4667{
4668 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); 4668 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
4669 return vcpu_mmio_write(vcpu, gpa, bytes, val); 4669 return vcpu_mmio_write(vcpu, gpa, bytes, val);
4670} 4670}
4671 4671
4672static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, 4672static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
4673 void *val, int bytes) 4673 void *val, int bytes)
4674{ 4674{
4675 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); 4675 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
4676 return X86EMUL_IO_NEEDED; 4676 return X86EMUL_IO_NEEDED;
4677} 4677}
4678 4678
@@ -7264,13 +7264,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
7264 7264
7265int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 7265int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
7266{ 7266{
7267 struct fpu *fpu = &current->thread.fpu;
7268 int r; 7267 int r;
7269 7268
7270 fpu__initialize(fpu);
7271
7272 kvm_sigset_activate(vcpu); 7269 kvm_sigset_activate(vcpu);
7273 7270
7271 kvm_load_guest_fpu(vcpu);
7272
7274 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { 7273 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
7275 if (kvm_run->immediate_exit) { 7274 if (kvm_run->immediate_exit) {
7276 r = -EINTR; 7275 r = -EINTR;
@@ -7296,14 +7295,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
7296 } 7295 }
7297 } 7296 }
7298 7297
7299 kvm_load_guest_fpu(vcpu);
7300
7301 if (unlikely(vcpu->arch.complete_userspace_io)) { 7298 if (unlikely(vcpu->arch.complete_userspace_io)) {
7302 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io; 7299 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
7303 vcpu->arch.complete_userspace_io = NULL; 7300 vcpu->arch.complete_userspace_io = NULL;
7304 r = cui(vcpu); 7301 r = cui(vcpu);
7305 if (r <= 0) 7302 if (r <= 0)
7306 goto out_fpu; 7303 goto out;
7307 } else 7304 } else
7308 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); 7305 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
7309 7306
@@ -7312,9 +7309,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
7312 else 7309 else
7313 r = vcpu_run(vcpu); 7310 r = vcpu_run(vcpu);
7314 7311
7315out_fpu:
7316 kvm_put_guest_fpu(vcpu);
7317out: 7312out:
7313 kvm_put_guest_fpu(vcpu);
7318 post_kvm_run_save(vcpu); 7314 post_kvm_run_save(vcpu);
7319 kvm_sigset_deactivate(vcpu); 7315 kvm_sigset_deactivate(vcpu);
7320 7316
@@ -7384,7 +7380,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
7384#endif 7380#endif
7385 7381
7386 kvm_rip_write(vcpu, regs->rip); 7382 kvm_rip_write(vcpu, regs->rip);
7387 kvm_set_rflags(vcpu, regs->rflags); 7383 kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
7388 7384
7389 vcpu->arch.exception.pending = false; 7385 vcpu->arch.exception.pending = false;
7390 7386
@@ -7498,6 +7494,29 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
7498} 7494}
7499EXPORT_SYMBOL_GPL(kvm_task_switch); 7495EXPORT_SYMBOL_GPL(kvm_task_switch);
7500 7496
7497int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
7498{
7499 if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) {
7500 /*
7501 * When EFER.LME and CR0.PG are set, the processor is in
7502 * 64-bit mode (though maybe in a 32-bit code segment).
7503 * CR4.PAE and EFER.LMA must be set.
7504 */
7505 if (!(sregs->cr4 & X86_CR4_PAE_BIT)
7506 || !(sregs->efer & EFER_LMA))
7507 return -EINVAL;
7508 } else {
7509 /*
7510 * Not in 64-bit mode: EFER.LMA is clear and the code
7511 * segment cannot be 64-bit.
7512 */
7513 if (sregs->efer & EFER_LMA || sregs->cs.l)
7514 return -EINVAL;
7515 }
7516
7517 return 0;
7518}
7519
7501int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 7520int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
7502 struct kvm_sregs *sregs) 7521 struct kvm_sregs *sregs)
7503{ 7522{
@@ -7510,6 +7529,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
7510 (sregs->cr4 & X86_CR4_OSXSAVE)) 7529 (sregs->cr4 & X86_CR4_OSXSAVE))
7511 return -EINVAL; 7530 return -EINVAL;
7512 7531
7532 if (kvm_valid_sregs(vcpu, sregs))
7533 return -EINVAL;
7534
7513 apic_base_msr.data = sregs->apic_base; 7535 apic_base_msr.data = sregs->apic_base;
7514 apic_base_msr.host_initiated = true; 7536 apic_base_msr.host_initiated = true;
7515 if (kvm_set_apic_base(vcpu, &apic_base_msr)) 7537 if (kvm_set_apic_base(vcpu, &apic_base_msr))
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 553f8fd23cc4..4846eff7e4c8 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops)
107 delay = min_t(u64, MWAITX_MAX_LOOPS, loops); 107 delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
108 108
109 /* 109 /*
110 * Use cpu_tss as a cacheline-aligned, seldomly 110 * Use cpu_tss_rw as a cacheline-aligned, seldomly
111 * accessed per-cpu variable as the monitor target. 111 * accessed per-cpu variable as the monitor target.
112 */ 112 */
113 __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0); 113 __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
114 114
115 /* 115 /*
116 * AMD, like Intel, supports the EAX hint and EAX=0xf 116 * AMD, like Intel, supports the EAX hint and EAX=0xf
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index febf6980e653..06fe3d51d385 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -860,7 +860,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
860 if (!printk_ratelimit()) 860 if (!printk_ratelimit())
861 return; 861 return;
862 862
863 printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx", 863 printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
864 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 864 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
865 tsk->comm, task_pid_nr(tsk), address, 865 tsk->comm, task_pid_nr(tsk), address,
866 (void *)regs->ip, (void *)regs->sp, error_code); 866 (void *)regs->ip, (void *)regs->sp, error_code);
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 99dfed6dfef8..9ec70d780f1f 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -277,6 +277,7 @@ void __init kasan_early_init(void)
277void __init kasan_init(void) 277void __init kasan_init(void)
278{ 278{
279 int i; 279 int i;
280 void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
280 281
281#ifdef CONFIG_KASAN_INLINE 282#ifdef CONFIG_KASAN_INLINE
282 register_die_notifier(&kasan_die_notifier); 283 register_die_notifier(&kasan_die_notifier);
@@ -329,8 +330,23 @@ void __init kasan_init(void)
329 (unsigned long)kasan_mem_to_shadow(_end), 330 (unsigned long)kasan_mem_to_shadow(_end),
330 early_pfn_to_nid(__pa(_stext))); 331 early_pfn_to_nid(__pa(_stext)));
331 332
333 shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM);
334 shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
335 shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
336 PAGE_SIZE);
337
338 shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE);
339 shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
340 shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
341 PAGE_SIZE);
342
332 kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), 343 kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
333 (void *)KASAN_SHADOW_END); 344 shadow_cpu_entry_begin);
345
346 kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
347 (unsigned long)shadow_cpu_entry_end, 0);
348
349 kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END);
334 350
335 load_cr3(init_top_pgt); 351 load_cr3(init_top_pgt);
336 __flush_tlb_all(); 352 __flush_tlb_all();
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 36a28eddb435..a7d966964c6f 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -152,17 +152,19 @@ static void do_fpu_end(void)
152static void fix_processor_context(void) 152static void fix_processor_context(void)
153{ 153{
154 int cpu = smp_processor_id(); 154 int cpu = smp_processor_id();
155 struct tss_struct *t = &per_cpu(cpu_tss, cpu);
156#ifdef CONFIG_X86_64 155#ifdef CONFIG_X86_64
157 struct desc_struct *desc = get_cpu_gdt_rw(cpu); 156 struct desc_struct *desc = get_cpu_gdt_rw(cpu);
158 tss_desc tss; 157 tss_desc tss;
159#endif 158#endif
160 set_tss_desc(cpu, t); /* 159
161 * This just modifies memory; should not be 160 /*
162 * necessary. But... This is necessary, because 161 * We need to reload TR, which requires that we change the
163 * 386 hardware has concept of busy TSS or some 162 * GDT entry to indicate "available" first.
164 * similar stupidity. 163 *
165 */ 164 * XXX: This could probably all be replaced by a call to
165 * force_reload_TR().
166 */
167 set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
166 168
167#ifdef CONFIG_X86_64 169#ifdef CONFIG_X86_64
168 memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc)); 170 memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index f2414c6c5e7c..7beeee1443b3 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -826,7 +826,7 @@ static void xen_load_sp0(unsigned long sp0)
826 mcs = xen_mc_entry(0); 826 mcs = xen_mc_entry(0);
827 MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0); 827 MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
828 xen_mc_issue(PARAVIRT_LAZY_CPU); 828 xen_mc_issue(PARAVIRT_LAZY_CPU);
829 this_cpu_write(cpu_tss.x86_tss.sp0, sp0); 829 this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
830} 830}
831 831
832void xen_set_iopl_mask(unsigned mask) 832void xen_set_iopl_mask(unsigned mask)
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index fc048ec686e7..6cf801ca1142 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2272,7 +2272,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2272#endif 2272#endif
2273 case FIX_TEXT_POKE0: 2273 case FIX_TEXT_POKE0:
2274 case FIX_TEXT_POKE1: 2274 case FIX_TEXT_POKE1:
2275 case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END: 2275 case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM:
2276 /* All local page mappings */ 2276 /* All local page mappings */
2277 pte = pfn_pte(phys, prot); 2277 pte = pfn_pte(phys, prot);
2278 break; 2278 break;
diff --git a/block/bio.c b/block/bio.c
index 8bfdea58159b..9ef6cf3addb3 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -599,6 +599,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
599 bio->bi_disk = bio_src->bi_disk; 599 bio->bi_disk = bio_src->bi_disk;
600 bio->bi_partno = bio_src->bi_partno; 600 bio->bi_partno = bio_src->bi_partno;
601 bio_set_flag(bio, BIO_CLONED); 601 bio_set_flag(bio, BIO_CLONED);
602 if (bio_flagged(bio_src, BIO_THROTTLED))
603 bio_set_flag(bio, BIO_THROTTLED);
602 bio->bi_opf = bio_src->bi_opf; 604 bio->bi_opf = bio_src->bi_opf;
603 bio->bi_write_hint = bio_src->bi_write_hint; 605 bio->bi_write_hint = bio_src->bi_write_hint;
604 bio->bi_iter = bio_src->bi_iter; 606 bio->bi_iter = bio_src->bi_iter;
diff --git a/block/blk-map.c b/block/blk-map.c
index b21f8e86f120..d3a94719f03f 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -12,22 +12,29 @@
12#include "blk.h" 12#include "blk.h"
13 13
14/* 14/*
15 * Append a bio to a passthrough request. Only works can be merged into 15 * Append a bio to a passthrough request. Only works if the bio can be merged
16 * the request based on the driver constraints. 16 * into the request based on the driver constraints.
17 */ 17 */
18int blk_rq_append_bio(struct request *rq, struct bio *bio) 18int blk_rq_append_bio(struct request *rq, struct bio **bio)
19{ 19{
20 blk_queue_bounce(rq->q, &bio); 20 struct bio *orig_bio = *bio;
21
22 blk_queue_bounce(rq->q, bio);
21 23
22 if (!rq->bio) { 24 if (!rq->bio) {
23 blk_rq_bio_prep(rq->q, rq, bio); 25 blk_rq_bio_prep(rq->q, rq, *bio);
24 } else { 26 } else {
25 if (!ll_back_merge_fn(rq->q, rq, bio)) 27 if (!ll_back_merge_fn(rq->q, rq, *bio)) {
28 if (orig_bio != *bio) {
29 bio_put(*bio);
30 *bio = orig_bio;
31 }
26 return -EINVAL; 32 return -EINVAL;
33 }
27 34
28 rq->biotail->bi_next = bio; 35 rq->biotail->bi_next = *bio;
29 rq->biotail = bio; 36 rq->biotail = *bio;
30 rq->__data_len += bio->bi_iter.bi_size; 37 rq->__data_len += (*bio)->bi_iter.bi_size;
31 } 38 }
32 39
33 return 0; 40 return 0;
@@ -73,14 +80,12 @@ static int __blk_rq_map_user_iov(struct request *rq,
73 * We link the bounce buffer in and could have to traverse it 80 * We link the bounce buffer in and could have to traverse it
74 * later so we have to get a ref to prevent it from being freed 81 * later so we have to get a ref to prevent it from being freed
75 */ 82 */
76 ret = blk_rq_append_bio(rq, bio); 83 ret = blk_rq_append_bio(rq, &bio);
77 bio_get(bio);
78 if (ret) { 84 if (ret) {
79 bio_endio(bio);
80 __blk_rq_unmap_user(orig_bio); 85 __blk_rq_unmap_user(orig_bio);
81 bio_put(bio);
82 return ret; 86 return ret;
83 } 87 }
88 bio_get(bio);
84 89
85 return 0; 90 return 0;
86} 91}
@@ -213,7 +218,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
213 int reading = rq_data_dir(rq) == READ; 218 int reading = rq_data_dir(rq) == READ;
214 unsigned long addr = (unsigned long) kbuf; 219 unsigned long addr = (unsigned long) kbuf;
215 int do_copy = 0; 220 int do_copy = 0;
216 struct bio *bio; 221 struct bio *bio, *orig_bio;
217 int ret; 222 int ret;
218 223
219 if (len > (queue_max_hw_sectors(q) << 9)) 224 if (len > (queue_max_hw_sectors(q) << 9))
@@ -236,10 +241,11 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
236 if (do_copy) 241 if (do_copy)
237 rq->rq_flags |= RQF_COPY_USER; 242 rq->rq_flags |= RQF_COPY_USER;
238 243
239 ret = blk_rq_append_bio(rq, bio); 244 orig_bio = bio;
245 ret = blk_rq_append_bio(rq, &bio);
240 if (unlikely(ret)) { 246 if (unlikely(ret)) {
241 /* request is too big */ 247 /* request is too big */
242 bio_put(bio); 248 bio_put(orig_bio);
243 return ret; 249 return ret;
244 } 250 }
245 251
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 825bc29767e6..d19f416d6101 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2226,13 +2226,7 @@ again:
2226out_unlock: 2226out_unlock:
2227 spin_unlock_irq(q->queue_lock); 2227 spin_unlock_irq(q->queue_lock);
2228out: 2228out:
2229 /* 2229 bio_set_flag(bio, BIO_THROTTLED);
2230 * As multiple blk-throtls may stack in the same issue path, we
2231 * don't want bios to leave with the flag set. Clear the flag if
2232 * being issued.
2233 */
2234 if (!throttled)
2235 bio_clear_flag(bio, BIO_THROTTLED);
2236 2230
2237#ifdef CONFIG_BLK_DEV_THROTTLING_LOW 2231#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
2238 if (throttled || !td->track_bio_latency) 2232 if (throttled || !td->track_bio_latency)
diff --git a/block/bounce.c b/block/bounce.c
index fceb1a96480b..1d05c422c932 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -200,6 +200,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
200 unsigned i = 0; 200 unsigned i = 0;
201 bool bounce = false; 201 bool bounce = false;
202 int sectors = 0; 202 int sectors = 0;
203 bool passthrough = bio_is_passthrough(*bio_orig);
203 204
204 bio_for_each_segment(from, *bio_orig, iter) { 205 bio_for_each_segment(from, *bio_orig, iter) {
205 if (i++ < BIO_MAX_PAGES) 206 if (i++ < BIO_MAX_PAGES)
@@ -210,13 +211,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
210 if (!bounce) 211 if (!bounce)
211 return; 212 return;
212 213
213 if (sectors < bio_sectors(*bio_orig)) { 214 if (!passthrough && sectors < bio_sectors(*bio_orig)) {
214 bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split); 215 bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
215 bio_chain(bio, *bio_orig); 216 bio_chain(bio, *bio_orig);
216 generic_make_request(*bio_orig); 217 generic_make_request(*bio_orig);
217 *bio_orig = bio; 218 *bio_orig = bio;
218 } 219 }
219 bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set); 220 bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL :
221 bounce_bio_set);
220 222
221 bio_for_each_segment_all(to, bio, i) { 223 bio_for_each_segment_all(to, bio, i) {
222 struct page *page = to->bv_page; 224 struct page *page = to->bv_page;
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index b4df317c2916..f95c60774ce8 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -100,9 +100,13 @@ struct kyber_hctx_data {
100 unsigned int cur_domain; 100 unsigned int cur_domain;
101 unsigned int batching; 101 unsigned int batching;
102 wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS]; 102 wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
103 struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
103 atomic_t wait_index[KYBER_NUM_DOMAINS]; 104 atomic_t wait_index[KYBER_NUM_DOMAINS];
104}; 105};
105 106
107static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
108 void *key);
109
106static int rq_sched_domain(const struct request *rq) 110static int rq_sched_domain(const struct request *rq)
107{ 111{
108 unsigned int op = rq->cmd_flags; 112 unsigned int op = rq->cmd_flags;
@@ -385,6 +389,9 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
385 389
386 for (i = 0; i < KYBER_NUM_DOMAINS; i++) { 390 for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
387 INIT_LIST_HEAD(&khd->rqs[i]); 391 INIT_LIST_HEAD(&khd->rqs[i]);
392 init_waitqueue_func_entry(&khd->domain_wait[i],
393 kyber_domain_wake);
394 khd->domain_wait[i].private = hctx;
388 INIT_LIST_HEAD(&khd->domain_wait[i].entry); 395 INIT_LIST_HEAD(&khd->domain_wait[i].entry);
389 atomic_set(&khd->wait_index[i], 0); 396 atomic_set(&khd->wait_index[i], 0);
390 } 397 }
@@ -524,35 +531,39 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
524 int nr; 531 int nr;
525 532
526 nr = __sbitmap_queue_get(domain_tokens); 533 nr = __sbitmap_queue_get(domain_tokens);
527 if (nr >= 0)
528 return nr;
529 534
530 /* 535 /*
531 * If we failed to get a domain token, make sure the hardware queue is 536 * If we failed to get a domain token, make sure the hardware queue is
532 * run when one becomes available. Note that this is serialized on 537 * run when one becomes available. Note that this is serialized on
533 * khd->lock, but we still need to be careful about the waker. 538 * khd->lock, but we still need to be careful about the waker.
534 */ 539 */
535 if (list_empty_careful(&wait->entry)) { 540 if (nr < 0 && list_empty_careful(&wait->entry)) {
536 init_waitqueue_func_entry(wait, kyber_domain_wake);
537 wait->private = hctx;
538 ws = sbq_wait_ptr(domain_tokens, 541 ws = sbq_wait_ptr(domain_tokens,
539 &khd->wait_index[sched_domain]); 542 &khd->wait_index[sched_domain]);
543 khd->domain_ws[sched_domain] = ws;
540 add_wait_queue(&ws->wait, wait); 544 add_wait_queue(&ws->wait, wait);
541 545
542 /* 546 /*
543 * Try again in case a token was freed before we got on the wait 547 * Try again in case a token was freed before we got on the wait
544 * queue. The waker may have already removed the entry from the 548 * queue.
545 * wait queue, but list_del_init() is okay with that.
546 */ 549 */
547 nr = __sbitmap_queue_get(domain_tokens); 550 nr = __sbitmap_queue_get(domain_tokens);
548 if (nr >= 0) { 551 }
549 unsigned long flags;
550 552
551 spin_lock_irqsave(&ws->wait.lock, flags); 553 /*
552 list_del_init(&wait->entry); 554 * If we got a token while we were on the wait queue, remove ourselves
553 spin_unlock_irqrestore(&ws->wait.lock, flags); 555 * from the wait queue to ensure that all wake ups make forward
554 } 556 * progress. It's possible that the waker already deleted the entry
557 * between the !list_empty_careful() check and us grabbing the lock, but
558 * list_del_init() is okay with that.
559 */
560 if (nr >= 0 && !list_empty_careful(&wait->entry)) {
561 ws = khd->domain_ws[sched_domain];
562 spin_lock_irq(&ws->wait.lock);
563 list_del_init(&wait->entry);
564 spin_unlock_irq(&ws->wait.lock);
555 } 565 }
566
556 return nr; 567 return nr;
557} 568}
558 569
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 6742f6c68034..9bff853e85f3 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -1007,7 +1007,7 @@ skip:
1007 /* The record may be cleared by others, try read next record */ 1007 /* The record may be cleared by others, try read next record */
1008 if (len == -ENOENT) 1008 if (len == -ENOENT)
1009 goto skip; 1009 goto skip;
1010 else if (len < sizeof(*rcd)) { 1010 else if (len < 0 || len < sizeof(*rcd)) {
1011 rc = -EIO; 1011 rc = -EIO;
1012 goto out; 1012 goto out;
1013 } 1013 }
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 30e84cc600ae..06ea4749ebd9 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -1171,7 +1171,7 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
1171 struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); 1171 struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
1172 struct cpc_register_resource *desired_reg; 1172 struct cpc_register_resource *desired_reg;
1173 int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); 1173 int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
1174 struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id]; 1174 struct cppc_pcc_data *pcc_ss_data;
1175 int ret = 0; 1175 int ret = 0;
1176 1176
1177 if (!cpc_desc || pcc_ss_id < 0) { 1177 if (!cpc_desc || pcc_ss_id < 0) {
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index ccb9975a97fa..ad0477ae820f 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -35,13 +35,13 @@ static inline u64 mb_per_tick(int mbps)
35struct nullb_cmd { 35struct nullb_cmd {
36 struct list_head list; 36 struct list_head list;
37 struct llist_node ll_list; 37 struct llist_node ll_list;
38 call_single_data_t csd; 38 struct __call_single_data csd;
39 struct request *rq; 39 struct request *rq;
40 struct bio *bio; 40 struct bio *bio;
41 unsigned int tag; 41 unsigned int tag;
42 blk_status_t error;
42 struct nullb_queue *nq; 43 struct nullb_queue *nq;
43 struct hrtimer timer; 44 struct hrtimer timer;
44 blk_status_t error;
45}; 45};
46 46
47struct nullb_queue { 47struct nullb_queue {
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 58d4f4e1ad6a..ca38229b045a 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -22,6 +22,8 @@
22 22
23#include "cpufreq_governor.h" 23#include "cpufreq_governor.h"
24 24
25#define CPUFREQ_DBS_MIN_SAMPLING_INTERVAL (2 * TICK_NSEC / NSEC_PER_USEC)
26
25static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs); 27static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs);
26 28
27static DEFINE_MUTEX(gov_dbs_data_mutex); 29static DEFINE_MUTEX(gov_dbs_data_mutex);
@@ -47,11 +49,15 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
47{ 49{
48 struct dbs_data *dbs_data = to_dbs_data(attr_set); 50 struct dbs_data *dbs_data = to_dbs_data(attr_set);
49 struct policy_dbs_info *policy_dbs; 51 struct policy_dbs_info *policy_dbs;
52 unsigned int sampling_interval;
50 int ret; 53 int ret;
51 ret = sscanf(buf, "%u", &dbs_data->sampling_rate); 54
52 if (ret != 1) 55 ret = sscanf(buf, "%u", &sampling_interval);
56 if (ret != 1 || sampling_interval < CPUFREQ_DBS_MIN_SAMPLING_INTERVAL)
53 return -EINVAL; 57 return -EINVAL;
54 58
59 dbs_data->sampling_rate = sampling_interval;
60
55 /* 61 /*
56 * We are operating under dbs_data->mutex and so the list and its 62 * We are operating under dbs_data->mutex and so the list and its
57 * entries can't be freed concurrently. 63 * entries can't be freed concurrently.
@@ -430,7 +436,14 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
430 if (ret) 436 if (ret)
431 goto free_policy_dbs_info; 437 goto free_policy_dbs_info;
432 438
433 dbs_data->sampling_rate = cpufreq_policy_transition_delay_us(policy); 439 /*
440 * The sampling interval should not be less than the transition latency
441 * of the CPU and it also cannot be too small for dbs_update() to work
442 * correctly.
443 */
444 dbs_data->sampling_rate = max_t(unsigned int,
445 CPUFREQ_DBS_MIN_SAMPLING_INTERVAL,
446 cpufreq_policy_transition_delay_us(policy));
434 447
435 if (!have_governor_per_policy()) 448 if (!have_governor_per_policy())
436 gov->gdbs_data = dbs_data; 449 gov->gdbs_data = dbs_data;
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 628fe899cb48..d9b2c2de49c4 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -226,17 +226,18 @@ static void imx6q_opp_check_speed_grading(struct device *dev)
226 val >>= OCOTP_CFG3_SPEED_SHIFT; 226 val >>= OCOTP_CFG3_SPEED_SHIFT;
227 val &= 0x3; 227 val &= 0x3;
228 228
229 if ((val != OCOTP_CFG3_SPEED_1P2GHZ) &&
230 of_machine_is_compatible("fsl,imx6q"))
231 if (dev_pm_opp_disable(dev, 1200000000))
232 dev_warn(dev, "failed to disable 1.2GHz OPP\n");
233 if (val < OCOTP_CFG3_SPEED_996MHZ) 229 if (val < OCOTP_CFG3_SPEED_996MHZ)
234 if (dev_pm_opp_disable(dev, 996000000)) 230 if (dev_pm_opp_disable(dev, 996000000))
235 dev_warn(dev, "failed to disable 996MHz OPP\n"); 231 dev_warn(dev, "failed to disable 996MHz OPP\n");
236 if (of_machine_is_compatible("fsl,imx6q")) { 232
233 if (of_machine_is_compatible("fsl,imx6q") ||
234 of_machine_is_compatible("fsl,imx6qp")) {
237 if (val != OCOTP_CFG3_SPEED_852MHZ) 235 if (val != OCOTP_CFG3_SPEED_852MHZ)
238 if (dev_pm_opp_disable(dev, 852000000)) 236 if (dev_pm_opp_disable(dev, 852000000))
239 dev_warn(dev, "failed to disable 852MHz OPP\n"); 237 dev_warn(dev, "failed to disable 852MHz OPP\n");
238 if (val != OCOTP_CFG3_SPEED_1P2GHZ)
239 if (dev_pm_opp_disable(dev, 1200000000))
240 dev_warn(dev, "failed to disable 1.2GHz OPP\n");
240 } 241 }
241 iounmap(base); 242 iounmap(base);
242put_node: 243put_node:
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index fbab271b3bf9..a861b5b4d443 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -708,7 +708,7 @@ atc_prep_dma_interleaved(struct dma_chan *chan,
708 unsigned long flags) 708 unsigned long flags)
709{ 709{
710 struct at_dma_chan *atchan = to_at_dma_chan(chan); 710 struct at_dma_chan *atchan = to_at_dma_chan(chan);
711 struct data_chunk *first = xt->sgl; 711 struct data_chunk *first;
712 struct at_desc *desc = NULL; 712 struct at_desc *desc = NULL;
713 size_t xfer_count; 713 size_t xfer_count;
714 unsigned int dwidth; 714 unsigned int dwidth;
@@ -720,6 +720,8 @@ atc_prep_dma_interleaved(struct dma_chan *chan,
720 if (unlikely(!xt || xt->numf != 1 || !xt->frame_size)) 720 if (unlikely(!xt || xt->numf != 1 || !xt->frame_size))
721 return NULL; 721 return NULL;
722 722
723 first = xt->sgl;
724
723 dev_info(chan2dev(chan), 725 dev_info(chan2dev(chan),
724 "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n", 726 "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n",
725 __func__, &xt->src_start, &xt->dst_start, xt->numf, 727 __func__, &xt->src_start, &xt->dst_start, xt->numf,
diff --git a/drivers/dma/dma-jz4740.c b/drivers/dma/dma-jz4740.c
index d50273fed715..afd5e10f8927 100644
--- a/drivers/dma/dma-jz4740.c
+++ b/drivers/dma/dma-jz4740.c
@@ -555,7 +555,7 @@ static int jz4740_dma_probe(struct platform_device *pdev)
555 555
556 ret = dma_async_device_register(dd); 556 ret = dma_async_device_register(dd);
557 if (ret) 557 if (ret)
558 return ret; 558 goto err_clk;
559 559
560 irq = platform_get_irq(pdev, 0); 560 irq = platform_get_irq(pdev, 0);
561 ret = request_irq(irq, jz4740_dma_irq, 0, dev_name(&pdev->dev), dmadev); 561 ret = request_irq(irq, jz4740_dma_irq, 0, dev_name(&pdev->dev), dmadev);
@@ -568,6 +568,8 @@ static int jz4740_dma_probe(struct platform_device *pdev)
568 568
569err_unregister: 569err_unregister:
570 dma_async_device_unregister(dd); 570 dma_async_device_unregister(dd);
571err_clk:
572 clk_disable_unprepare(dmadev->clk);
571 return ret; 573 return ret;
572} 574}
573 575
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 47edc7fbf91f..ec5f9d2bc820 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -155,6 +155,12 @@ MODULE_PARM_DESC(run, "Run the test (default: false)");
155#define PATTERN_COUNT_MASK 0x1f 155#define PATTERN_COUNT_MASK 0x1f
156#define PATTERN_MEMSET_IDX 0x01 156#define PATTERN_MEMSET_IDX 0x01
157 157
158/* poor man's completion - we want to use wait_event_freezable() on it */
159struct dmatest_done {
160 bool done;
161 wait_queue_head_t *wait;
162};
163
158struct dmatest_thread { 164struct dmatest_thread {
159 struct list_head node; 165 struct list_head node;
160 struct dmatest_info *info; 166 struct dmatest_info *info;
@@ -165,6 +171,8 @@ struct dmatest_thread {
165 u8 **dsts; 171 u8 **dsts;
166 u8 **udsts; 172 u8 **udsts;
167 enum dma_transaction_type type; 173 enum dma_transaction_type type;
174 wait_queue_head_t done_wait;
175 struct dmatest_done test_done;
168 bool done; 176 bool done;
169}; 177};
170 178
@@ -342,18 +350,25 @@ static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
342 return error_count; 350 return error_count;
343} 351}
344 352
345/* poor man's completion - we want to use wait_event_freezable() on it */
346struct dmatest_done {
347 bool done;
348 wait_queue_head_t *wait;
349};
350 353
351static void dmatest_callback(void *arg) 354static void dmatest_callback(void *arg)
352{ 355{
353 struct dmatest_done *done = arg; 356 struct dmatest_done *done = arg;
354 357 struct dmatest_thread *thread =
355 done->done = true; 358 container_of(arg, struct dmatest_thread, done_wait);
356 wake_up_all(done->wait); 359 if (!thread->done) {
360 done->done = true;
361 wake_up_all(done->wait);
362 } else {
363 /*
364 * If thread->done, it means that this callback occurred
365 * after the parent thread has cleaned up. This can
366 * happen in the case that driver doesn't implement
367 * the terminate_all() functionality and a dma operation
368 * did not occur within the timeout period
369 */
370 WARN(1, "dmatest: Kernel memory may be corrupted!!\n");
371 }
357} 372}
358 373
359static unsigned int min_odd(unsigned int x, unsigned int y) 374static unsigned int min_odd(unsigned int x, unsigned int y)
@@ -424,9 +439,8 @@ static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len)
424 */ 439 */
425static int dmatest_func(void *data) 440static int dmatest_func(void *data)
426{ 441{
427 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_wait);
428 struct dmatest_thread *thread = data; 442 struct dmatest_thread *thread = data;
429 struct dmatest_done done = { .wait = &done_wait }; 443 struct dmatest_done *done = &thread->test_done;
430 struct dmatest_info *info; 444 struct dmatest_info *info;
431 struct dmatest_params *params; 445 struct dmatest_params *params;
432 struct dma_chan *chan; 446 struct dma_chan *chan;
@@ -673,9 +687,9 @@ static int dmatest_func(void *data)
673 continue; 687 continue;
674 } 688 }
675 689
676 done.done = false; 690 done->done = false;
677 tx->callback = dmatest_callback; 691 tx->callback = dmatest_callback;
678 tx->callback_param = &done; 692 tx->callback_param = done;
679 cookie = tx->tx_submit(tx); 693 cookie = tx->tx_submit(tx);
680 694
681 if (dma_submit_error(cookie)) { 695 if (dma_submit_error(cookie)) {
@@ -688,21 +702,12 @@ static int dmatest_func(void *data)
688 } 702 }
689 dma_async_issue_pending(chan); 703 dma_async_issue_pending(chan);
690 704
691 wait_event_freezable_timeout(done_wait, done.done, 705 wait_event_freezable_timeout(thread->done_wait, done->done,
692 msecs_to_jiffies(params->timeout)); 706 msecs_to_jiffies(params->timeout));
693 707
694 status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); 708 status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
695 709
696 if (!done.done) { 710 if (!done->done) {
697 /*
698 * We're leaving the timed out dma operation with
699 * dangling pointer to done_wait. To make this
700 * correct, we'll need to allocate wait_done for
701 * each test iteration and perform "who's gonna
702 * free it this time?" dancing. For now, just
703 * leave it dangling.
704 */
705 WARN(1, "dmatest: Kernel stack may be corrupted!!\n");
706 dmaengine_unmap_put(um); 711 dmaengine_unmap_put(um);
707 result("test timed out", total_tests, src_off, dst_off, 712 result("test timed out", total_tests, src_off, dst_off,
708 len, 0); 713 len, 0);
@@ -789,7 +794,7 @@ err_thread_type:
789 dmatest_KBs(runtime, total_len), ret); 794 dmatest_KBs(runtime, total_len), ret);
790 795
791 /* terminate all transfers on specified channels */ 796 /* terminate all transfers on specified channels */
792 if (ret) 797 if (ret || failed_tests)
793 dmaengine_terminate_all(chan); 798 dmaengine_terminate_all(chan);
794 799
795 thread->done = true; 800 thread->done = true;
@@ -849,6 +854,8 @@ static int dmatest_add_threads(struct dmatest_info *info,
849 thread->info = info; 854 thread->info = info;
850 thread->chan = dtc->chan; 855 thread->chan = dtc->chan;
851 thread->type = type; 856 thread->type = type;
857 thread->test_done.wait = &thread->done_wait;
858 init_waitqueue_head(&thread->done_wait);
852 smp_wmb(); 859 smp_wmb();
853 thread->task = kthread_create(dmatest_func, thread, "%s-%s%u", 860 thread->task = kthread_create(dmatest_func, thread, "%s-%s%u",
854 dma_chan_name(chan), op, i); 861 dma_chan_name(chan), op, i);
diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c
index 6775f2c74e25..c7568869284e 100644
--- a/drivers/dma/fsl-edma.c
+++ b/drivers/dma/fsl-edma.c
@@ -863,11 +863,11 @@ static void fsl_edma_irq_exit(
863 } 863 }
864} 864}
865 865
866static void fsl_disable_clocks(struct fsl_edma_engine *fsl_edma) 866static void fsl_disable_clocks(struct fsl_edma_engine *fsl_edma, int nr_clocks)
867{ 867{
868 int i; 868 int i;
869 869
870 for (i = 0; i < DMAMUX_NR; i++) 870 for (i = 0; i < nr_clocks; i++)
871 clk_disable_unprepare(fsl_edma->muxclk[i]); 871 clk_disable_unprepare(fsl_edma->muxclk[i]);
872} 872}
873 873
@@ -904,25 +904,25 @@ static int fsl_edma_probe(struct platform_device *pdev)
904 904
905 res = platform_get_resource(pdev, IORESOURCE_MEM, 1 + i); 905 res = platform_get_resource(pdev, IORESOURCE_MEM, 1 + i);
906 fsl_edma->muxbase[i] = devm_ioremap_resource(&pdev->dev, res); 906 fsl_edma->muxbase[i] = devm_ioremap_resource(&pdev->dev, res);
907 if (IS_ERR(fsl_edma->muxbase[i])) 907 if (IS_ERR(fsl_edma->muxbase[i])) {
908 /* on error: disable all previously enabled clks */
909 fsl_disable_clocks(fsl_edma, i);
908 return PTR_ERR(fsl_edma->muxbase[i]); 910 return PTR_ERR(fsl_edma->muxbase[i]);
911 }
909 912
910 sprintf(clkname, "dmamux%d", i); 913 sprintf(clkname, "dmamux%d", i);
911 fsl_edma->muxclk[i] = devm_clk_get(&pdev->dev, clkname); 914 fsl_edma->muxclk[i] = devm_clk_get(&pdev->dev, clkname);
912 if (IS_ERR(fsl_edma->muxclk[i])) { 915 if (IS_ERR(fsl_edma->muxclk[i])) {
913 dev_err(&pdev->dev, "Missing DMAMUX block clock.\n"); 916 dev_err(&pdev->dev, "Missing DMAMUX block clock.\n");
917 /* on error: disable all previously enabled clks */
918 fsl_disable_clocks(fsl_edma, i);
914 return PTR_ERR(fsl_edma->muxclk[i]); 919 return PTR_ERR(fsl_edma->muxclk[i]);
915 } 920 }
916 921
917 ret = clk_prepare_enable(fsl_edma->muxclk[i]); 922 ret = clk_prepare_enable(fsl_edma->muxclk[i]);
918 if (ret) { 923 if (ret)
919 /* disable only clks which were enabled on error */ 924 /* on error: disable all previously enabled clks */
920 for (; i >= 0; i--) 925 fsl_disable_clocks(fsl_edma, i);
921 clk_disable_unprepare(fsl_edma->muxclk[i]);
922
923 dev_err(&pdev->dev, "DMAMUX clk block failed.\n");
924 return ret;
925 }
926 926
927 } 927 }
928 928
@@ -976,7 +976,7 @@ static int fsl_edma_probe(struct platform_device *pdev)
976 if (ret) { 976 if (ret) {
977 dev_err(&pdev->dev, 977 dev_err(&pdev->dev,
978 "Can't register Freescale eDMA engine. (%d)\n", ret); 978 "Can't register Freescale eDMA engine. (%d)\n", ret);
979 fsl_disable_clocks(fsl_edma); 979 fsl_disable_clocks(fsl_edma, DMAMUX_NR);
980 return ret; 980 return ret;
981 } 981 }
982 982
@@ -985,7 +985,7 @@ static int fsl_edma_probe(struct platform_device *pdev)
985 dev_err(&pdev->dev, 985 dev_err(&pdev->dev,
986 "Can't register Freescale eDMA of_dma. (%d)\n", ret); 986 "Can't register Freescale eDMA of_dma. (%d)\n", ret);
987 dma_async_device_unregister(&fsl_edma->dma_dev); 987 dma_async_device_unregister(&fsl_edma->dma_dev);
988 fsl_disable_clocks(fsl_edma); 988 fsl_disable_clocks(fsl_edma, DMAMUX_NR);
989 return ret; 989 return ret;
990 } 990 }
991 991
@@ -1015,7 +1015,7 @@ static int fsl_edma_remove(struct platform_device *pdev)
1015 fsl_edma_cleanup_vchan(&fsl_edma->dma_dev); 1015 fsl_edma_cleanup_vchan(&fsl_edma->dma_dev);
1016 of_dma_controller_free(np); 1016 of_dma_controller_free(np);
1017 dma_async_device_unregister(&fsl_edma->dma_dev); 1017 dma_async_device_unregister(&fsl_edma->dma_dev);
1018 fsl_disable_clocks(fsl_edma); 1018 fsl_disable_clocks(fsl_edma, DMAMUX_NR);
1019 1019
1020 return 0; 1020 return 0;
1021} 1021}
diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index 2f31d3d0caa6..7792a9186f9c 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -390,7 +390,7 @@ static int ioat_dma_self_test(struct ioatdma_device *ioat_dma)
390 if (memcmp(src, dest, IOAT_TEST_SIZE)) { 390 if (memcmp(src, dest, IOAT_TEST_SIZE)) {
391 dev_err(dev, "Self-test copy failed compare, disabling\n"); 391 dev_err(dev, "Self-test copy failed compare, disabling\n");
392 err = -ENODEV; 392 err = -ENODEV;
393 goto free_resources; 393 goto unmap_dma;
394 } 394 }
395 395
396unmap_dma: 396unmap_dma:
diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c
index c9714072e224..59c82cdcf48d 100644
--- a/drivers/mfd/cros_ec_spi.c
+++ b/drivers/mfd/cros_ec_spi.c
@@ -377,6 +377,7 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
377 u8 *ptr; 377 u8 *ptr;
378 u8 *rx_buf; 378 u8 *rx_buf;
379 u8 sum; 379 u8 sum;
380 u8 rx_byte;
380 int ret = 0, final_ret; 381 int ret = 0, final_ret;
381 382
382 len = cros_ec_prepare_tx(ec_dev, ec_msg); 383 len = cros_ec_prepare_tx(ec_dev, ec_msg);
@@ -421,25 +422,22 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
421 if (!ret) { 422 if (!ret) {
422 /* Verify that EC can process command */ 423 /* Verify that EC can process command */
423 for (i = 0; i < len; i++) { 424 for (i = 0; i < len; i++) {
424 switch (rx_buf[i]) { 425 rx_byte = rx_buf[i];
425 case EC_SPI_PAST_END: 426 if (rx_byte == EC_SPI_PAST_END ||
426 case EC_SPI_RX_BAD_DATA: 427 rx_byte == EC_SPI_RX_BAD_DATA ||
427 case EC_SPI_NOT_READY: 428 rx_byte == EC_SPI_NOT_READY) {
428 ret = -EAGAIN; 429 ret = -EREMOTEIO;
429 ec_msg->result = EC_RES_IN_PROGRESS;
430 default:
431 break; 430 break;
432 } 431 }
433 if (ret)
434 break;
435 } 432 }
436 if (!ret)
437 ret = cros_ec_spi_receive_packet(ec_dev,
438 ec_msg->insize + sizeof(*response));
439 } else {
440 dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
441 } 433 }
442 434
435 if (!ret)
436 ret = cros_ec_spi_receive_packet(ec_dev,
437 ec_msg->insize + sizeof(*response));
438 else
439 dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
440
443 final_ret = terminate_request(ec_dev); 441 final_ret = terminate_request(ec_dev);
444 442
445 spi_bus_unlock(ec_spi->spi->master); 443 spi_bus_unlock(ec_spi->spi->master);
@@ -508,6 +506,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
508 int i, len; 506 int i, len;
509 u8 *ptr; 507 u8 *ptr;
510 u8 *rx_buf; 508 u8 *rx_buf;
509 u8 rx_byte;
511 int sum; 510 int sum;
512 int ret = 0, final_ret; 511 int ret = 0, final_ret;
513 512
@@ -544,25 +543,22 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
544 if (!ret) { 543 if (!ret) {
545 /* Verify that EC can process command */ 544 /* Verify that EC can process command */
546 for (i = 0; i < len; i++) { 545 for (i = 0; i < len; i++) {
547 switch (rx_buf[i]) { 546 rx_byte = rx_buf[i];
548 case EC_SPI_PAST_END: 547 if (rx_byte == EC_SPI_PAST_END ||
549 case EC_SPI_RX_BAD_DATA: 548 rx_byte == EC_SPI_RX_BAD_DATA ||
550 case EC_SPI_NOT_READY: 549 rx_byte == EC_SPI_NOT_READY) {
551 ret = -EAGAIN; 550 ret = -EREMOTEIO;
552 ec_msg->result = EC_RES_IN_PROGRESS;
553 default:
554 break; 551 break;
555 } 552 }
556 if (ret)
557 break;
558 } 553 }
559 if (!ret)
560 ret = cros_ec_spi_receive_response(ec_dev,
561 ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
562 } else {
563 dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
564 } 554 }
565 555
556 if (!ret)
557 ret = cros_ec_spi_receive_response(ec_dev,
558 ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
559 else
560 dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
561
566 final_ret = terminate_request(ec_dev); 562 final_ret = terminate_request(ec_dev);
567 563
568 spi_bus_unlock(ec_spi->spi->master); 564 spi_bus_unlock(ec_spi->spi->master);
@@ -667,6 +663,7 @@ static int cros_ec_spi_probe(struct spi_device *spi)
667 sizeof(struct ec_response_get_protocol_info); 663 sizeof(struct ec_response_get_protocol_info);
668 ec_dev->dout_size = sizeof(struct ec_host_request); 664 ec_dev->dout_size = sizeof(struct ec_host_request);
669 665
666 ec_spi->last_transfer_ns = ktime_get_ns();
670 667
671 err = cros_ec_register(ec_dev); 668 err = cros_ec_register(ec_dev);
672 if (err) { 669 if (err) {
diff --git a/drivers/mfd/twl4030-audio.c b/drivers/mfd/twl4030-audio.c
index da16bf45fab4..dc94ffc6321a 100644
--- a/drivers/mfd/twl4030-audio.c
+++ b/drivers/mfd/twl4030-audio.c
@@ -159,13 +159,18 @@ unsigned int twl4030_audio_get_mclk(void)
159EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk); 159EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk);
160 160
161static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata, 161static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata,
162 struct device_node *node) 162 struct device_node *parent)
163{ 163{
164 struct device_node *node;
165
164 if (pdata && pdata->codec) 166 if (pdata && pdata->codec)
165 return true; 167 return true;
166 168
167 if (of_find_node_by_name(node, "codec")) 169 node = of_get_child_by_name(parent, "codec");
170 if (node) {
171 of_node_put(node);
168 return true; 172 return true;
173 }
169 174
170 return false; 175 return false;
171} 176}
diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c
index d66502d36ba0..dd19f17a1b63 100644
--- a/drivers/mfd/twl6040.c
+++ b/drivers/mfd/twl6040.c
@@ -97,12 +97,16 @@ static struct reg_sequence twl6040_patch[] = {
97}; 97};
98 98
99 99
100static bool twl6040_has_vibra(struct device_node *node) 100static bool twl6040_has_vibra(struct device_node *parent)
101{ 101{
102#ifdef CONFIG_OF 102 struct device_node *node;
103 if (of_find_node_by_name(node, "vibra")) 103
104 node = of_get_child_by_name(parent, "vibra");
105 if (node) {
106 of_node_put(node);
104 return true; 107 return true;
105#endif 108 }
109
106 return false; 110 return false;
107} 111}
108 112
diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c
index eda38cbe8530..41f2a9f6851d 100644
--- a/drivers/misc/pti.c
+++ b/drivers/misc/pti.c
@@ -32,7 +32,7 @@
32#include <linux/pci.h> 32#include <linux/pci.h>
33#include <linux/mutex.h> 33#include <linux/mutex.h>
34#include <linux/miscdevice.h> 34#include <linux/miscdevice.h>
35#include <linux/pti.h> 35#include <linux/intel-pti.h>
36#include <linux/slab.h> 36#include <linux/slab.h>
37#include <linux/uaccess.h> 37#include <linux/uaccess.h>
38 38
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index f80e911b8843..73b605577447 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1114,7 +1114,7 @@ static int mtd_check_oob_ops(struct mtd_info *mtd, loff_t offs,
1114 if (!ops->oobbuf) 1114 if (!ops->oobbuf)
1115 ops->ooblen = 0; 1115 ops->ooblen = 0;
1116 1116
1117 if (offs < 0 || offs + ops->len >= mtd->size) 1117 if (offs < 0 || offs + ops->len > mtd->size)
1118 return -EINVAL; 1118 return -EINVAL;
1119 1119
1120 if (ops->ooblen) { 1120 if (ops->ooblen) {
diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c
index e0eb51d8c012..dd56a671ea42 100644
--- a/drivers/mtd/nand/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/brcmnand/brcmnand.c
@@ -1763,7 +1763,7 @@ try_dmaread:
1763 err = brcmstb_nand_verify_erased_page(mtd, chip, buf, 1763 err = brcmstb_nand_verify_erased_page(mtd, chip, buf,
1764 addr); 1764 addr);
1765 /* erased page bitflips corrected */ 1765 /* erased page bitflips corrected */
1766 if (err > 0) 1766 if (err >= 0)
1767 return err; 1767 return err;
1768 } 1768 }
1769 1769
diff --git a/drivers/mtd/nand/gpio.c b/drivers/mtd/nand/gpio.c
index 484f7fbc3f7d..a8bde6665c24 100644
--- a/drivers/mtd/nand/gpio.c
+++ b/drivers/mtd/nand/gpio.c
@@ -253,9 +253,9 @@ static int gpio_nand_probe(struct platform_device *pdev)
253 goto out_ce; 253 goto out_ce;
254 } 254 }
255 255
256 gpiomtd->nwp = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW); 256 gpiomtd->ale = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW);
257 if (IS_ERR(gpiomtd->nwp)) { 257 if (IS_ERR(gpiomtd->ale)) {
258 ret = PTR_ERR(gpiomtd->nwp); 258 ret = PTR_ERR(gpiomtd->ale);
259 goto out_ce; 259 goto out_ce;
260 } 260 }
261 261
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index 50f8d4a1b983..d4d824ef64e9 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -1067,9 +1067,6 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
1067 return ret; 1067 return ret;
1068 } 1068 }
1069 1069
1070 /* handle the block mark swapping */
1071 block_mark_swapping(this, payload_virt, auxiliary_virt);
1072
1073 /* Loop over status bytes, accumulating ECC status. */ 1070 /* Loop over status bytes, accumulating ECC status. */
1074 status = auxiliary_virt + nfc_geo->auxiliary_status_offset; 1071 status = auxiliary_virt + nfc_geo->auxiliary_status_offset;
1075 1072
@@ -1158,6 +1155,9 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
1158 max_bitflips = max_t(unsigned int, max_bitflips, *status); 1155 max_bitflips = max_t(unsigned int, max_bitflips, *status);
1159 } 1156 }
1160 1157
1158 /* handle the block mark swapping */
1159 block_mark_swapping(this, buf, auxiliary_virt);
1160
1161 if (oob_required) { 1161 if (oob_required) {
1162 /* 1162 /*
1163 * It's time to deliver the OOB bytes. See gpmi_ecc_read_oob() 1163 * It's time to deliver the OOB bytes. See gpmi_ecc_read_oob()
diff --git a/drivers/net/ethernet/arc/emac.h b/drivers/net/ethernet/arc/emac.h
index 3c63b16d485f..d9efbc8d783b 100644
--- a/drivers/net/ethernet/arc/emac.h
+++ b/drivers/net/ethernet/arc/emac.h
@@ -159,6 +159,8 @@ struct arc_emac_priv {
159 unsigned int link; 159 unsigned int link;
160 unsigned int duplex; 160 unsigned int duplex;
161 unsigned int speed; 161 unsigned int speed;
162
163 unsigned int rx_missed_errors;
162}; 164};
163 165
164/** 166/**
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index 3241af1ce718..bd277b0dc615 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -26,6 +26,8 @@
26 26
27#include "emac.h" 27#include "emac.h"
28 28
29static void arc_emac_restart(struct net_device *ndev);
30
29/** 31/**
30 * arc_emac_tx_avail - Return the number of available slots in the tx ring. 32 * arc_emac_tx_avail - Return the number of available slots in the tx ring.
31 * @priv: Pointer to ARC EMAC private data structure. 33 * @priv: Pointer to ARC EMAC private data structure.
@@ -210,39 +212,48 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
210 continue; 212 continue;
211 } 213 }
212 214
213 pktlen = info & LEN_MASK; 215 /* Prepare the BD for next cycle. netif_receive_skb()
214 stats->rx_packets++; 216 * only if new skb was allocated and mapped to avoid holes
215 stats->rx_bytes += pktlen; 217 * in the RX fifo.
216 skb = rx_buff->skb; 218 */
217 skb_put(skb, pktlen); 219 skb = netdev_alloc_skb_ip_align(ndev, EMAC_BUFFER_SIZE);
218 skb->dev = ndev; 220 if (unlikely(!skb)) {
219 skb->protocol = eth_type_trans(skb, ndev); 221 if (net_ratelimit())
220 222 netdev_err(ndev, "cannot allocate skb\n");
221 dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr), 223 /* Return ownership to EMAC */
222 dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE); 224 rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
223
224 /* Prepare the BD for next cycle */
225 rx_buff->skb = netdev_alloc_skb_ip_align(ndev,
226 EMAC_BUFFER_SIZE);
227 if (unlikely(!rx_buff->skb)) {
228 stats->rx_errors++; 225 stats->rx_errors++;
229 /* Because receive_skb is below, increment rx_dropped */
230 stats->rx_dropped++; 226 stats->rx_dropped++;
231 continue; 227 continue;
232 } 228 }
233 229
234 /* receive_skb only if new skb was allocated to avoid holes */ 230 addr = dma_map_single(&ndev->dev, (void *)skb->data,
235 netif_receive_skb(skb);
236
237 addr = dma_map_single(&ndev->dev, (void *)rx_buff->skb->data,
238 EMAC_BUFFER_SIZE, DMA_FROM_DEVICE); 231 EMAC_BUFFER_SIZE, DMA_FROM_DEVICE);
239 if (dma_mapping_error(&ndev->dev, addr)) { 232 if (dma_mapping_error(&ndev->dev, addr)) {
240 if (net_ratelimit()) 233 if (net_ratelimit())
241 netdev_err(ndev, "cannot dma map\n"); 234 netdev_err(ndev, "cannot map dma buffer\n");
242 dev_kfree_skb(rx_buff->skb); 235 dev_kfree_skb(skb);
236 /* Return ownership to EMAC */
237 rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
243 stats->rx_errors++; 238 stats->rx_errors++;
239 stats->rx_dropped++;
244 continue; 240 continue;
245 } 241 }
242
243 /* unmap previosly mapped skb */
244 dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr),
245 dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE);
246
247 pktlen = info & LEN_MASK;
248 stats->rx_packets++;
249 stats->rx_bytes += pktlen;
250 skb_put(rx_buff->skb, pktlen);
251 rx_buff->skb->dev = ndev;
252 rx_buff->skb->protocol = eth_type_trans(rx_buff->skb, ndev);
253
254 netif_receive_skb(rx_buff->skb);
255
256 rx_buff->skb = skb;
246 dma_unmap_addr_set(rx_buff, addr, addr); 257 dma_unmap_addr_set(rx_buff, addr, addr);
247 dma_unmap_len_set(rx_buff, len, EMAC_BUFFER_SIZE); 258 dma_unmap_len_set(rx_buff, len, EMAC_BUFFER_SIZE);
248 259
@@ -259,6 +270,53 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
259} 270}
260 271
261/** 272/**
273 * arc_emac_rx_miss_handle - handle R_MISS register
274 * @ndev: Pointer to the net_device structure.
275 */
276static void arc_emac_rx_miss_handle(struct net_device *ndev)
277{
278 struct arc_emac_priv *priv = netdev_priv(ndev);
279 struct net_device_stats *stats = &ndev->stats;
280 unsigned int miss;
281
282 miss = arc_reg_get(priv, R_MISS);
283 if (miss) {
284 stats->rx_errors += miss;
285 stats->rx_missed_errors += miss;
286 priv->rx_missed_errors += miss;
287 }
288}
289
290/**
291 * arc_emac_rx_stall_check - check RX stall
292 * @ndev: Pointer to the net_device structure.
293 * @budget: How many BDs requested to process on 1 call.
294 * @work_done: How many BDs processed
295 *
296 * Under certain conditions EMAC stop reception of incoming packets and
297 * continuously increment R_MISS register instead of saving data into
298 * provided buffer. This function detect that condition and restart
299 * EMAC.
300 */
301static void arc_emac_rx_stall_check(struct net_device *ndev,
302 int budget, unsigned int work_done)
303{
304 struct arc_emac_priv *priv = netdev_priv(ndev);
305 struct arc_emac_bd *rxbd;
306
307 if (work_done)
308 priv->rx_missed_errors = 0;
309
310 if (priv->rx_missed_errors && budget) {
311 rxbd = &priv->rxbd[priv->last_rx_bd];
312 if (le32_to_cpu(rxbd->info) & FOR_EMAC) {
313 arc_emac_restart(ndev);
314 priv->rx_missed_errors = 0;
315 }
316 }
317}
318
319/**
262 * arc_emac_poll - NAPI poll handler. 320 * arc_emac_poll - NAPI poll handler.
263 * @napi: Pointer to napi_struct structure. 321 * @napi: Pointer to napi_struct structure.
264 * @budget: How many BDs to process on 1 call. 322 * @budget: How many BDs to process on 1 call.
@@ -272,6 +330,7 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
272 unsigned int work_done; 330 unsigned int work_done;
273 331
274 arc_emac_tx_clean(ndev); 332 arc_emac_tx_clean(ndev);
333 arc_emac_rx_miss_handle(ndev);
275 334
276 work_done = arc_emac_rx(ndev, budget); 335 work_done = arc_emac_rx(ndev, budget);
277 if (work_done < budget) { 336 if (work_done < budget) {
@@ -279,6 +338,8 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
279 arc_reg_or(priv, R_ENABLE, RXINT_MASK | TXINT_MASK); 338 arc_reg_or(priv, R_ENABLE, RXINT_MASK | TXINT_MASK);
280 } 339 }
281 340
341 arc_emac_rx_stall_check(ndev, budget, work_done);
342
282 return work_done; 343 return work_done;
283} 344}
284 345
@@ -320,6 +381,8 @@ static irqreturn_t arc_emac_intr(int irq, void *dev_instance)
320 if (status & MSER_MASK) { 381 if (status & MSER_MASK) {
321 stats->rx_missed_errors += 0x100; 382 stats->rx_missed_errors += 0x100;
322 stats->rx_errors += 0x100; 383 stats->rx_errors += 0x100;
384 priv->rx_missed_errors += 0x100;
385 napi_schedule(&priv->napi);
323 } 386 }
324 387
325 if (status & RXCR_MASK) { 388 if (status & RXCR_MASK) {
@@ -732,6 +795,63 @@ static int arc_emac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
732} 795}
733 796
734 797
798/**
799 * arc_emac_restart - Restart EMAC
800 * @ndev: Pointer to net_device structure.
801 *
802 * This function do hardware reset of EMAC in order to restore
803 * network packets reception.
804 */
805static void arc_emac_restart(struct net_device *ndev)
806{
807 struct arc_emac_priv *priv = netdev_priv(ndev);
808 struct net_device_stats *stats = &ndev->stats;
809 int i;
810
811 if (net_ratelimit())
812 netdev_warn(ndev, "restarting stalled EMAC\n");
813
814 netif_stop_queue(ndev);
815
816 /* Disable interrupts */
817 arc_reg_clr(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
818
819 /* Disable EMAC */
820 arc_reg_clr(priv, R_CTRL, EN_MASK);
821
822 /* Return the sk_buff to system */
823 arc_free_tx_queue(ndev);
824
825 /* Clean Tx BD's */
826 priv->txbd_curr = 0;
827 priv->txbd_dirty = 0;
828 memset(priv->txbd, 0, TX_RING_SZ);
829
830 for (i = 0; i < RX_BD_NUM; i++) {
831 struct arc_emac_bd *rxbd = &priv->rxbd[i];
832 unsigned int info = le32_to_cpu(rxbd->info);
833
834 if (!(info & FOR_EMAC)) {
835 stats->rx_errors++;
836 stats->rx_dropped++;
837 }
838 /* Return ownership to EMAC */
839 rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
840 }
841 priv->last_rx_bd = 0;
842
843 /* Make sure info is visible to EMAC before enable */
844 wmb();
845
846 /* Enable interrupts */
847 arc_reg_set(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
848
849 /* Enable EMAC */
850 arc_reg_or(priv, R_CTRL, EN_MASK);
851
852 netif_start_queue(ndev);
853}
854
735static const struct net_device_ops arc_emac_netdev_ops = { 855static const struct net_device_ops arc_emac_netdev_ops = {
736 .ndo_open = arc_emac_open, 856 .ndo_open = arc_emac_open,
737 .ndo_stop = arc_emac_stop, 857 .ndo_stop = arc_emac_stop,
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index de51c2177d03..d09c5a9c53b5 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -14225,7 +14225,9 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu)
14225 /* Reset PHY, otherwise the read DMA engine will be in a mode that 14225 /* Reset PHY, otherwise the read DMA engine will be in a mode that
14226 * breaks all requests to 256 bytes. 14226 * breaks all requests to 256 bytes.
14227 */ 14227 */
14228 if (tg3_asic_rev(tp) == ASIC_REV_57766) 14228 if (tg3_asic_rev(tp) == ASIC_REV_57766 ||
14229 tg3_asic_rev(tp) == ASIC_REV_5717 ||
14230 tg3_asic_rev(tp) == ASIC_REV_5719)
14229 reset_phy = true; 14231 reset_phy = true;
14230 14232
14231 err = tg3_restart_hw(tp, reset_phy); 14233 err = tg3_restart_hw(tp, reset_phy);
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index bc93b69cfd1e..a539263cd79c 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1214,6 +1214,10 @@ static void mvneta_port_disable(struct mvneta_port *pp)
1214 val &= ~MVNETA_GMAC0_PORT_ENABLE; 1214 val &= ~MVNETA_GMAC0_PORT_ENABLE;
1215 mvreg_write(pp, MVNETA_GMAC_CTRL_0, val); 1215 mvreg_write(pp, MVNETA_GMAC_CTRL_0, val);
1216 1216
1217 pp->link = 0;
1218 pp->duplex = -1;
1219 pp->speed = 0;
1220
1217 udelay(200); 1221 udelay(200);
1218} 1222}
1219 1223
@@ -1958,9 +1962,9 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
1958 1962
1959 if (!mvneta_rxq_desc_is_first_last(rx_status) || 1963 if (!mvneta_rxq_desc_is_first_last(rx_status) ||
1960 (rx_status & MVNETA_RXD_ERR_SUMMARY)) { 1964 (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
1965 mvneta_rx_error(pp, rx_desc);
1961err_drop_frame: 1966err_drop_frame:
1962 dev->stats.rx_errors++; 1967 dev->stats.rx_errors++;
1963 mvneta_rx_error(pp, rx_desc);
1964 /* leave the descriptor untouched */ 1968 /* leave the descriptor untouched */
1965 continue; 1969 continue;
1966 } 1970 }
@@ -3011,7 +3015,7 @@ static void mvneta_cleanup_rxqs(struct mvneta_port *pp)
3011{ 3015{
3012 int queue; 3016 int queue;
3013 3017
3014 for (queue = 0; queue < txq_number; queue++) 3018 for (queue = 0; queue < rxq_number; queue++)
3015 mvneta_rxq_deinit(pp, &pp->rxqs[queue]); 3019 mvneta_rxq_deinit(pp, &pp->rxqs[queue]);
3016} 3020}
3017 3021
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 54adfd967858..fc67e35b253e 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1961,11 +1961,12 @@ static int mtk_hw_init(struct mtk_eth *eth)
1961 /* set GE2 TUNE */ 1961 /* set GE2 TUNE */
1962 regmap_write(eth->pctl, GPIO_BIAS_CTRL, 0x0); 1962 regmap_write(eth->pctl, GPIO_BIAS_CTRL, 0x0);
1963 1963
1964 /* GE1, Force 1000M/FD, FC ON */ 1964 /* Set linkdown as the default for each GMAC. Its own MCR would be set
1965 mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(0)); 1965 * up with the more appropriate value when mtk_phy_link_adjust call is
1966 1966 * being invoked.
1967 /* GE2, Force 1000M/FD, FC ON */ 1967 */
1968 mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(1)); 1968 for (i = 0; i < MTK_MAC_COUNT; i++)
1969 mtk_w32(eth, 0, MTK_MAC_MCR(i));
1969 1970
1970 /* Indicates CDM to parse the MTK special tag from CPU 1971 /* Indicates CDM to parse the MTK special tag from CPU
1971 * which also is working out for untag packets. 1972 * which also is working out for untag packets.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 1fffdebbc9e8..e9a1fbcc4adf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -362,7 +362,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
362 case MLX5_CMD_OP_QUERY_VPORT_COUNTER: 362 case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
363 case MLX5_CMD_OP_ALLOC_Q_COUNTER: 363 case MLX5_CMD_OP_ALLOC_Q_COUNTER:
364 case MLX5_CMD_OP_QUERY_Q_COUNTER: 364 case MLX5_CMD_OP_QUERY_Q_COUNTER:
365 case MLX5_CMD_OP_SET_RATE_LIMIT: 365 case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
366 case MLX5_CMD_OP_QUERY_RATE_LIMIT: 366 case MLX5_CMD_OP_QUERY_RATE_LIMIT:
367 case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: 367 case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
368 case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: 368 case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
@@ -505,7 +505,7 @@ const char *mlx5_command_str(int command)
505 MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER); 505 MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
506 MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER); 506 MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
507 MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); 507 MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
508 MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT); 508 MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
509 MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT); 509 MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
510 MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT); 510 MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
511 MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT); 511 MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index c0872b3284cb..543060c305a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -82,6 +82,9 @@
82 max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req) 82 max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req)
83#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6) 83#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6)
84#define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8) 84#define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8)
85#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \
86 (cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \
87 MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev))
85 88
86#define MLX5_MPWRQ_LOG_WQE_SZ 18 89#define MLX5_MPWRQ_LOG_WQE_SZ 18
87#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ 90#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
@@ -590,6 +593,7 @@ struct mlx5e_channel {
590 struct mlx5_core_dev *mdev; 593 struct mlx5_core_dev *mdev;
591 struct hwtstamp_config *tstamp; 594 struct hwtstamp_config *tstamp;
592 int ix; 595 int ix;
596 int cpu;
593}; 597};
594 598
595struct mlx5e_channels { 599struct mlx5e_channels {
@@ -935,8 +939,9 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params,
935 u8 cq_period_mode); 939 u8 cq_period_mode);
936void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, 940void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
937 u8 cq_period_mode); 941 u8 cq_period_mode);
938void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, 942void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
939 struct mlx5e_params *params, u8 rq_type); 943 struct mlx5e_params *params,
944 u8 rq_type);
940 945
941static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) 946static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
942{ 947{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index c6d90b6dd80e..9bcf38f4123b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -274,6 +274,7 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
274static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, 274static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
275 struct ieee_ets *ets) 275 struct ieee_ets *ets)
276{ 276{
277 bool have_ets_tc = false;
277 int bw_sum = 0; 278 int bw_sum = 0;
278 int i; 279 int i;
279 280
@@ -288,11 +289,14 @@ static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
288 } 289 }
289 290
290 /* Validate Bandwidth Sum */ 291 /* Validate Bandwidth Sum */
291 for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) 292 for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
292 if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) 293 if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
294 have_ets_tc = true;
293 bw_sum += ets->tc_tx_bw[i]; 295 bw_sum += ets->tc_tx_bw[i];
296 }
297 }
294 298
295 if (bw_sum != 0 && bw_sum != 100) { 299 if (have_ets_tc && bw_sum != 100) {
296 netdev_err(netdev, 300 netdev_err(netdev,
297 "Failed to validate ETS: BW sum is illegal\n"); 301 "Failed to validate ETS: BW sum is illegal\n");
298 return -EINVAL; 302 return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 23425f028405..8f05efa5c829 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1523,8 +1523,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
1523 new_channels.params = priv->channels.params; 1523 new_channels.params = priv->channels.params;
1524 MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); 1524 MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
1525 1525
1526 mlx5e_set_rq_type_params(priv->mdev, &new_channels.params, 1526 new_channels.params.mpwqe_log_stride_sz =
1527 new_channels.params.rq_wq_type); 1527 MLX5E_MPWQE_STRIDE_SZ(priv->mdev, new_val);
1528 new_channels.params.mpwqe_log_num_strides =
1529 MLX5_MPWRQ_LOG_WQE_SZ - new_channels.params.mpwqe_log_stride_sz;
1528 1530
1529 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { 1531 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
1530 priv->channels.params = new_channels.params; 1532 priv->channels.params = new_channels.params;
@@ -1536,6 +1538,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
1536 return err; 1538 return err;
1537 1539
1538 mlx5e_switch_priv_channels(priv, &new_channels, NULL); 1540 mlx5e_switch_priv_channels(priv, &new_channels, NULL);
1541 mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n",
1542 MLX5E_GET_PFLAG(&priv->channels.params,
1543 MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF");
1544
1539 return 0; 1545 return 0;
1540} 1546}
1541 1547
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 0f5c012de52e..3aa1c90e7c86 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -71,11 +71,6 @@ struct mlx5e_channel_param {
71 struct mlx5e_cq_param icosq_cq; 71 struct mlx5e_cq_param icosq_cq;
72}; 72};
73 73
74static int mlx5e_get_node(struct mlx5e_priv *priv, int ix)
75{
76 return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix);
77}
78
79static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) 74static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
80{ 75{
81 return MLX5_CAP_GEN(mdev, striding_rq) && 76 return MLX5_CAP_GEN(mdev, striding_rq) &&
@@ -83,8 +78,8 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
83 MLX5_CAP_ETH(mdev, reg_umr_sq); 78 MLX5_CAP_ETH(mdev, reg_umr_sq);
84} 79}
85 80
86void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, 81void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
87 struct mlx5e_params *params, u8 rq_type) 82 struct mlx5e_params *params, u8 rq_type)
88{ 83{
89 params->rq_wq_type = rq_type; 84 params->rq_wq_type = rq_type;
90 params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; 85 params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
@@ -93,10 +88,8 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
93 params->log_rq_size = is_kdump_kernel() ? 88 params->log_rq_size = is_kdump_kernel() ?
94 MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW : 89 MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW :
95 MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; 90 MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
96 params->mpwqe_log_stride_sz = 91 params->mpwqe_log_stride_sz = MLX5E_MPWQE_STRIDE_SZ(mdev,
97 MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ? 92 MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
98 MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) :
99 MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
100 params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - 93 params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
101 params->mpwqe_log_stride_sz; 94 params->mpwqe_log_stride_sz;
102 break; 95 break;
@@ -120,13 +113,14 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
120 MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); 113 MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
121} 114}
122 115
123static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) 116static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev,
117 struct mlx5e_params *params)
124{ 118{
125 u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) && 119 u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) &&
126 !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ? 120 !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ?
127 MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : 121 MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
128 MLX5_WQ_TYPE_LINKED_LIST; 122 MLX5_WQ_TYPE_LINKED_LIST;
129 mlx5e_set_rq_type_params(mdev, params, rq_type); 123 mlx5e_init_rq_type_params(mdev, params, rq_type);
130} 124}
131 125
132static void mlx5e_update_carrier(struct mlx5e_priv *priv) 126static void mlx5e_update_carrier(struct mlx5e_priv *priv)
@@ -444,17 +438,16 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
444 int wq_sz = mlx5_wq_ll_get_size(&rq->wq); 438 int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
445 int mtt_sz = mlx5e_get_wqe_mtt_sz(); 439 int mtt_sz = mlx5e_get_wqe_mtt_sz();
446 int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1; 440 int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
447 int node = mlx5e_get_node(c->priv, c->ix);
448 int i; 441 int i;
449 442
450 rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info), 443 rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
451 GFP_KERNEL, node); 444 GFP_KERNEL, cpu_to_node(c->cpu));
452 if (!rq->mpwqe.info) 445 if (!rq->mpwqe.info)
453 goto err_out; 446 goto err_out;
454 447
455 /* We allocate more than mtt_sz as we will align the pointer */ 448 /* We allocate more than mtt_sz as we will align the pointer */
456 rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, 449 rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
457 GFP_KERNEL, node); 450 cpu_to_node(c->cpu));
458 if (unlikely(!rq->mpwqe.mtt_no_align)) 451 if (unlikely(!rq->mpwqe.mtt_no_align))
459 goto err_free_wqe_info; 452 goto err_free_wqe_info;
460 453
@@ -562,7 +555,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
562 int err; 555 int err;
563 int i; 556 int i;
564 557
565 rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); 558 rqp->wq.db_numa_node = cpu_to_node(c->cpu);
566 559
567 err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq, 560 err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq,
568 &rq->wq_ctrl); 561 &rq->wq_ctrl);
@@ -629,8 +622,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
629 default: /* MLX5_WQ_TYPE_LINKED_LIST */ 622 default: /* MLX5_WQ_TYPE_LINKED_LIST */
630 rq->wqe.frag_info = 623 rq->wqe.frag_info =
631 kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info), 624 kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info),
632 GFP_KERNEL, 625 GFP_KERNEL, cpu_to_node(c->cpu));
633 mlx5e_get_node(c->priv, c->ix));
634 if (!rq->wqe.frag_info) { 626 if (!rq->wqe.frag_info) {
635 err = -ENOMEM; 627 err = -ENOMEM;
636 goto err_rq_wq_destroy; 628 goto err_rq_wq_destroy;
@@ -1000,13 +992,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
1000 sq->uar_map = mdev->mlx5e_res.bfreg.map; 992 sq->uar_map = mdev->mlx5e_res.bfreg.map;
1001 sq->min_inline_mode = params->tx_min_inline_mode; 993 sq->min_inline_mode = params->tx_min_inline_mode;
1002 994
1003 param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); 995 param->wq.db_numa_node = cpu_to_node(c->cpu);
1004 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); 996 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
1005 if (err) 997 if (err)
1006 return err; 998 return err;
1007 sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; 999 sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1008 1000
1009 err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix)); 1001 err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
1010 if (err) 1002 if (err)
1011 goto err_sq_wq_destroy; 1003 goto err_sq_wq_destroy;
1012 1004
@@ -1053,13 +1045,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
1053 sq->channel = c; 1045 sq->channel = c;
1054 sq->uar_map = mdev->mlx5e_res.bfreg.map; 1046 sq->uar_map = mdev->mlx5e_res.bfreg.map;
1055 1047
1056 param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); 1048 param->wq.db_numa_node = cpu_to_node(c->cpu);
1057 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); 1049 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
1058 if (err) 1050 if (err)
1059 return err; 1051 return err;
1060 sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; 1052 sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1061 1053
1062 err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix)); 1054 err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
1063 if (err) 1055 if (err)
1064 goto err_sq_wq_destroy; 1056 goto err_sq_wq_destroy;
1065 1057
@@ -1126,13 +1118,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
1126 if (MLX5_IPSEC_DEV(c->priv->mdev)) 1118 if (MLX5_IPSEC_DEV(c->priv->mdev))
1127 set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); 1119 set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
1128 1120
1129 param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); 1121 param->wq.db_numa_node = cpu_to_node(c->cpu);
1130 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); 1122 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
1131 if (err) 1123 if (err)
1132 return err; 1124 return err;
1133 sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; 1125 sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1134 1126
1135 err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix)); 1127 err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
1136 if (err) 1128 if (err)
1137 goto err_sq_wq_destroy; 1129 goto err_sq_wq_destroy;
1138 1130
@@ -1504,8 +1496,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c,
1504 struct mlx5_core_dev *mdev = c->priv->mdev; 1496 struct mlx5_core_dev *mdev = c->priv->mdev;
1505 int err; 1497 int err;
1506 1498
1507 param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix); 1499 param->wq.buf_numa_node = cpu_to_node(c->cpu);
1508 param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); 1500 param->wq.db_numa_node = cpu_to_node(c->cpu);
1509 param->eq_ix = c->ix; 1501 param->eq_ix = c->ix;
1510 1502
1511 err = mlx5e_alloc_cq_common(mdev, param, cq); 1503 err = mlx5e_alloc_cq_common(mdev, param, cq);
@@ -1604,6 +1596,11 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
1604 mlx5e_free_cq(cq); 1596 mlx5e_free_cq(cq);
1605} 1597}
1606 1598
1599static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
1600{
1601 return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
1602}
1603
1607static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, 1604static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1608 struct mlx5e_params *params, 1605 struct mlx5e_params *params,
1609 struct mlx5e_channel_param *cparam) 1606 struct mlx5e_channel_param *cparam)
@@ -1752,12 +1749,13 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1752{ 1749{
1753 struct mlx5e_cq_moder icocq_moder = {0, 0}; 1750 struct mlx5e_cq_moder icocq_moder = {0, 0};
1754 struct net_device *netdev = priv->netdev; 1751 struct net_device *netdev = priv->netdev;
1752 int cpu = mlx5e_get_cpu(priv, ix);
1755 struct mlx5e_channel *c; 1753 struct mlx5e_channel *c;
1756 unsigned int irq; 1754 unsigned int irq;
1757 int err; 1755 int err;
1758 int eqn; 1756 int eqn;
1759 1757
1760 c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix)); 1758 c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
1761 if (!c) 1759 if (!c)
1762 return -ENOMEM; 1760 return -ENOMEM;
1763 1761
@@ -1765,6 +1763,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1765 c->mdev = priv->mdev; 1763 c->mdev = priv->mdev;
1766 c->tstamp = &priv->tstamp; 1764 c->tstamp = &priv->tstamp;
1767 c->ix = ix; 1765 c->ix = ix;
1766 c->cpu = cpu;
1768 c->pdev = &priv->mdev->pdev->dev; 1767 c->pdev = &priv->mdev->pdev->dev;
1769 c->netdev = priv->netdev; 1768 c->netdev = priv->netdev;
1770 c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); 1769 c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
@@ -1853,8 +1852,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
1853 for (tc = 0; tc < c->num_tc; tc++) 1852 for (tc = 0; tc < c->num_tc; tc++)
1854 mlx5e_activate_txqsq(&c->sq[tc]); 1853 mlx5e_activate_txqsq(&c->sq[tc]);
1855 mlx5e_activate_rq(&c->rq); 1854 mlx5e_activate_rq(&c->rq);
1856 netif_set_xps_queue(c->netdev, 1855 netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
1857 mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix);
1858} 1856}
1859 1857
1860static void mlx5e_deactivate_channel(struct mlx5e_channel *c) 1858static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
@@ -3679,6 +3677,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
3679 struct sk_buff *skb, 3677 struct sk_buff *skb,
3680 netdev_features_t features) 3678 netdev_features_t features)
3681{ 3679{
3680 unsigned int offset = 0;
3682 struct udphdr *udph; 3681 struct udphdr *udph;
3683 u8 proto; 3682 u8 proto;
3684 u16 port; 3683 u16 port;
@@ -3688,7 +3687,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
3688 proto = ip_hdr(skb)->protocol; 3687 proto = ip_hdr(skb)->protocol;
3689 break; 3688 break;
3690 case htons(ETH_P_IPV6): 3689 case htons(ETH_P_IPV6):
3691 proto = ipv6_hdr(skb)->nexthdr; 3690 proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
3692 break; 3691 break;
3693 default: 3692 default:
3694 goto out; 3693 goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 60771865c99c..e7e7cef2bde4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -466,7 +466,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
466 break; 466 break;
467 case MLX5_EVENT_TYPE_CQ_ERROR: 467 case MLX5_EVENT_TYPE_CQ_ERROR:
468 cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; 468 cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
469 mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrom 0x%x\n", 469 mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
470 cqn, eqe->data.cq_err.syndrome); 470 cqn, eqe->data.cq_err.syndrome);
471 mlx5_cq_event(dev, cqn, eqe->type); 471 mlx5_cq_event(dev, cqn, eqe->type);
472 break; 472 break;
@@ -775,7 +775,7 @@ err1:
775 return err; 775 return err;
776} 776}
777 777
778int mlx5_stop_eqs(struct mlx5_core_dev *dev) 778void mlx5_stop_eqs(struct mlx5_core_dev *dev)
779{ 779{
780 struct mlx5_eq_table *table = &dev->priv.eq_table; 780 struct mlx5_eq_table *table = &dev->priv.eq_table;
781 int err; 781 int err;
@@ -784,22 +784,26 @@ int mlx5_stop_eqs(struct mlx5_core_dev *dev)
784 if (MLX5_CAP_GEN(dev, pg)) { 784 if (MLX5_CAP_GEN(dev, pg)) {
785 err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq); 785 err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
786 if (err) 786 if (err)
787 return err; 787 mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
788 err);
788 } 789 }
789#endif 790#endif
790 791
791 err = mlx5_destroy_unmap_eq(dev, &table->pages_eq); 792 err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
792 if (err) 793 if (err)
793 return err; 794 mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
795 err);
794 796
795 mlx5_destroy_unmap_eq(dev, &table->async_eq); 797 err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
798 if (err)
799 mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
800 err);
796 mlx5_cmd_use_polling(dev); 801 mlx5_cmd_use_polling(dev);
797 802
798 err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq); 803 err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
799 if (err) 804 if (err)
800 mlx5_cmd_use_events(dev); 805 mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
801 806 err);
802 return err;
803} 807}
804 808
805int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, 809int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
index 3c11d6e2160a..14962969c5ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
@@ -66,6 +66,9 @@ static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size,
66 u8 actual_size; 66 u8 actual_size;
67 int err; 67 int err;
68 68
69 if (!size)
70 return -EINVAL;
71
69 if (!fdev->mdev) 72 if (!fdev->mdev)
70 return -ENOTCONN; 73 return -ENOTCONN;
71 74
@@ -95,6 +98,9 @@ static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size,
95 u8 actual_size; 98 u8 actual_size;
96 int err; 99 int err;
97 100
101 if (!size)
102 return -EINVAL;
103
98 if (!fdev->mdev) 104 if (!fdev->mdev)
99 return -ENOTCONN; 105 return -ENOTCONN;
100 106
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index c70fd663a633..dfaad9ecb2b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -174,6 +174,8 @@ static void del_hw_fte(struct fs_node *node);
174static void del_sw_flow_table(struct fs_node *node); 174static void del_sw_flow_table(struct fs_node *node);
175static void del_sw_flow_group(struct fs_node *node); 175static void del_sw_flow_group(struct fs_node *node);
176static void del_sw_fte(struct fs_node *node); 176static void del_sw_fte(struct fs_node *node);
177static void del_sw_prio(struct fs_node *node);
178static void del_sw_ns(struct fs_node *node);
177/* Delete rule (destination) is special case that 179/* Delete rule (destination) is special case that
178 * requires to lock the FTE for all the deletion process. 180 * requires to lock the FTE for all the deletion process.
179 */ 181 */
@@ -408,6 +410,16 @@ static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
408 return NULL; 410 return NULL;
409} 411}
410 412
413static void del_sw_ns(struct fs_node *node)
414{
415 kfree(node);
416}
417
418static void del_sw_prio(struct fs_node *node)
419{
420 kfree(node);
421}
422
411static void del_hw_flow_table(struct fs_node *node) 423static void del_hw_flow_table(struct fs_node *node)
412{ 424{
413 struct mlx5_flow_table *ft; 425 struct mlx5_flow_table *ft;
@@ -2064,7 +2076,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
2064 return ERR_PTR(-ENOMEM); 2076 return ERR_PTR(-ENOMEM);
2065 2077
2066 fs_prio->node.type = FS_TYPE_PRIO; 2078 fs_prio->node.type = FS_TYPE_PRIO;
2067 tree_init_node(&fs_prio->node, NULL, NULL); 2079 tree_init_node(&fs_prio->node, NULL, del_sw_prio);
2068 tree_add_node(&fs_prio->node, &ns->node); 2080 tree_add_node(&fs_prio->node, &ns->node);
2069 fs_prio->num_levels = num_levels; 2081 fs_prio->num_levels = num_levels;
2070 fs_prio->prio = prio; 2082 fs_prio->prio = prio;
@@ -2090,7 +2102,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
2090 return ERR_PTR(-ENOMEM); 2102 return ERR_PTR(-ENOMEM);
2091 2103
2092 fs_init_namespace(ns); 2104 fs_init_namespace(ns);
2093 tree_init_node(&ns->node, NULL, NULL); 2105 tree_init_node(&ns->node, NULL, del_sw_ns);
2094 tree_add_node(&ns->node, &prio->node); 2106 tree_add_node(&ns->node, &prio->node);
2095 list_add_tail(&ns->node.list, &prio->node.children); 2107 list_add_tail(&ns->node.list, &prio->node.children);
2096 2108
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 1a0e797ad001..21d29f7936f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -241,7 +241,7 @@ static void print_health_info(struct mlx5_core_dev *dev)
241 u32 fw; 241 u32 fw;
242 int i; 242 int i;
243 243
244 /* If the syndrom is 0, the device is OK and no need to print buffer */ 244 /* If the syndrome is 0, the device is OK and no need to print buffer */
245 if (!ioread8(&h->synd)) 245 if (!ioread8(&h->synd))
246 return; 246 return;
247 247
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index d2a66dc4adc6..8812d7208e8f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -57,7 +57,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
57 struct mlx5e_params *params) 57 struct mlx5e_params *params)
58{ 58{
59 /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */ 59 /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */
60 mlx5e_set_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST); 60 mlx5e_init_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST);
61 61
62 /* RQ size in ipoib by default is 512 */ 62 /* RQ size in ipoib by default is 512 */
63 params->log_rq_size = is_kdump_kernel() ? 63 params->log_rq_size = is_kdump_kernel() ?
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 5f323442cc5a..8a89c7e8cd63 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -317,9 +317,6 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
317{ 317{
318 struct mlx5_priv *priv = &dev->priv; 318 struct mlx5_priv *priv = &dev->priv;
319 struct mlx5_eq_table *table = &priv->eq_table; 319 struct mlx5_eq_table *table = &priv->eq_table;
320 struct irq_affinity irqdesc = {
321 .pre_vectors = MLX5_EQ_VEC_COMP_BASE,
322 };
323 int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq); 320 int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
324 int nvec; 321 int nvec;
325 322
@@ -333,10 +330,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
333 if (!priv->irq_info) 330 if (!priv->irq_info)
334 goto err_free_msix; 331 goto err_free_msix;
335 332
336 nvec = pci_alloc_irq_vectors_affinity(dev->pdev, 333 nvec = pci_alloc_irq_vectors(dev->pdev,
337 MLX5_EQ_VEC_COMP_BASE + 1, nvec, 334 MLX5_EQ_VEC_COMP_BASE + 1, nvec,
338 PCI_IRQ_MSIX | PCI_IRQ_AFFINITY, 335 PCI_IRQ_MSIX);
339 &irqdesc);
340 if (nvec < 0) 336 if (nvec < 0)
341 return nvec; 337 return nvec;
342 338
@@ -622,6 +618,63 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
622 return (u64)timer_l | (u64)timer_h1 << 32; 618 return (u64)timer_l | (u64)timer_h1 << 32;
623} 619}
624 620
621static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
622{
623 struct mlx5_priv *priv = &mdev->priv;
624 int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
625
626 if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
627 mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
628 return -ENOMEM;
629 }
630
631 cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
632 priv->irq_info[i].mask);
633
634 if (IS_ENABLED(CONFIG_SMP) &&
635 irq_set_affinity_hint(irq, priv->irq_info[i].mask))
636 mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
637
638 return 0;
639}
640
641static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
642{
643 struct mlx5_priv *priv = &mdev->priv;
644 int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
645
646 irq_set_affinity_hint(irq, NULL);
647 free_cpumask_var(priv->irq_info[i].mask);
648}
649
650static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
651{
652 int err;
653 int i;
654
655 for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
656 err = mlx5_irq_set_affinity_hint(mdev, i);
657 if (err)
658 goto err_out;
659 }
660
661 return 0;
662
663err_out:
664 for (i--; i >= 0; i--)
665 mlx5_irq_clear_affinity_hint(mdev, i);
666
667 return err;
668}
669
670static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
671{
672 int i;
673
674 for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
675 mlx5_irq_clear_affinity_hint(mdev, i);
676}
677
625int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, 678int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
626 unsigned int *irqn) 679 unsigned int *irqn)
627{ 680{
@@ -1097,6 +1150,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
1097 goto err_stop_eqs; 1150 goto err_stop_eqs;
1098 } 1151 }
1099 1152
1153 err = mlx5_irq_set_affinity_hints(dev);
1154 if (err) {
1155 dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
1156 goto err_affinity_hints;
1157 }
1158
1100 err = mlx5_init_fs(dev); 1159 err = mlx5_init_fs(dev);
1101 if (err) { 1160 if (err) {
1102 dev_err(&pdev->dev, "Failed to init flow steering\n"); 1161 dev_err(&pdev->dev, "Failed to init flow steering\n");
@@ -1154,6 +1213,9 @@ err_sriov:
1154 mlx5_cleanup_fs(dev); 1213 mlx5_cleanup_fs(dev);
1155 1214
1156err_fs: 1215err_fs:
1216 mlx5_irq_clear_affinity_hints(dev);
1217
1218err_affinity_hints:
1157 free_comp_eqs(dev); 1219 free_comp_eqs(dev);
1158 1220
1159err_stop_eqs: 1221err_stop_eqs:
@@ -1222,6 +1284,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
1222 1284
1223 mlx5_sriov_detach(dev); 1285 mlx5_sriov_detach(dev);
1224 mlx5_cleanup_fs(dev); 1286 mlx5_cleanup_fs(dev);
1287 mlx5_irq_clear_affinity_hints(dev);
1225 free_comp_eqs(dev); 1288 free_comp_eqs(dev);
1226 mlx5_stop_eqs(dev); 1289 mlx5_stop_eqs(dev);
1227 mlx5_put_uars_page(dev, priv->uar); 1290 mlx5_put_uars_page(dev, priv->uar);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index db9e665ab104..889130edb715 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -213,8 +213,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
213err_cmd: 213err_cmd:
214 memset(din, 0, sizeof(din)); 214 memset(din, 0, sizeof(din));
215 memset(dout, 0, sizeof(dout)); 215 memset(dout, 0, sizeof(dout));
216 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); 216 MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
217 MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); 217 MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
218 mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); 218 mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
219 return err; 219 return err;
220} 220}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
index e651e4c02867..d3c33e9eea72 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -125,16 +125,16 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
125 return ret_entry; 125 return ret_entry;
126} 126}
127 127
128static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev, 128static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev,
129 u32 rate, u16 index) 129 u32 rate, u16 index)
130{ 130{
131 u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)] = {0}; 131 u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {0};
132 u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0}; 132 u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0};
133 133
134 MLX5_SET(set_rate_limit_in, in, opcode, 134 MLX5_SET(set_pp_rate_limit_in, in, opcode,
135 MLX5_CMD_OP_SET_RATE_LIMIT); 135 MLX5_CMD_OP_SET_PP_RATE_LIMIT);
136 MLX5_SET(set_rate_limit_in, in, rate_limit_index, index); 136 MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index);
137 MLX5_SET(set_rate_limit_in, in, rate_limit, rate); 137 MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rate);
138 return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); 138 return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
139} 139}
140 140
@@ -173,7 +173,7 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index)
173 entry->refcount++; 173 entry->refcount++;
174 } else { 174 } else {
175 /* new rate limit */ 175 /* new rate limit */
176 err = mlx5_set_rate_limit_cmd(dev, rate, entry->index); 176 err = mlx5_set_pp_rate_limit_cmd(dev, rate, entry->index);
177 if (err) { 177 if (err) {
178 mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n", 178 mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n",
179 rate, err); 179 rate, err);
@@ -209,7 +209,7 @@ void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate)
209 entry->refcount--; 209 entry->refcount--;
210 if (!entry->refcount) { 210 if (!entry->refcount) {
211 /* need to remove rate */ 211 /* need to remove rate */
212 mlx5_set_rate_limit_cmd(dev, 0, entry->index); 212 mlx5_set_pp_rate_limit_cmd(dev, 0, entry->index);
213 entry->rate = 0; 213 entry->rate = 0;
214 } 214 }
215 215
@@ -262,8 +262,8 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev)
262 /* Clear all configured rates */ 262 /* Clear all configured rates */
263 for (i = 0; i < table->max_size; i++) 263 for (i = 0; i < table->max_size; i++)
264 if (table->rl_entry[i].rate) 264 if (table->rl_entry[i].rate)
265 mlx5_set_rate_limit_cmd(dev, 0, 265 mlx5_set_pp_rate_limit_cmd(dev, 0,
266 table->rl_entry[i].index); 266 table->rl_entry[i].index);
267 267
268 kfree(dev->priv.rl_table.rl_entry); 268 kfree(dev->priv.rl_table.rl_entry);
269} 269}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
index 07a9ba6cfc70..2f74953e4561 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
@@ -71,9 +71,9 @@ struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port)
71 struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; 71 struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
72 struct mlx5e_vxlan *vxlan; 72 struct mlx5e_vxlan *vxlan;
73 73
74 spin_lock(&vxlan_db->lock); 74 spin_lock_bh(&vxlan_db->lock);
75 vxlan = radix_tree_lookup(&vxlan_db->tree, port); 75 vxlan = radix_tree_lookup(&vxlan_db->tree, port);
76 spin_unlock(&vxlan_db->lock); 76 spin_unlock_bh(&vxlan_db->lock);
77 77
78 return vxlan; 78 return vxlan;
79} 79}
@@ -88,8 +88,12 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
88 struct mlx5e_vxlan *vxlan; 88 struct mlx5e_vxlan *vxlan;
89 int err; 89 int err;
90 90
91 if (mlx5e_vxlan_lookup_port(priv, port)) 91 mutex_lock(&priv->state_lock);
92 vxlan = mlx5e_vxlan_lookup_port(priv, port);
93 if (vxlan) {
94 atomic_inc(&vxlan->refcount);
92 goto free_work; 95 goto free_work;
96 }
93 97
94 if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port)) 98 if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port))
95 goto free_work; 99 goto free_work;
@@ -99,10 +103,11 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
99 goto err_delete_port; 103 goto err_delete_port;
100 104
101 vxlan->udp_port = port; 105 vxlan->udp_port = port;
106 atomic_set(&vxlan->refcount, 1);
102 107
103 spin_lock_irq(&vxlan_db->lock); 108 spin_lock_bh(&vxlan_db->lock);
104 err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan); 109 err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan);
105 spin_unlock_irq(&vxlan_db->lock); 110 spin_unlock_bh(&vxlan_db->lock);
106 if (err) 111 if (err)
107 goto err_free; 112 goto err_free;
108 113
@@ -113,35 +118,39 @@ err_free:
113err_delete_port: 118err_delete_port:
114 mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); 119 mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
115free_work: 120free_work:
121 mutex_unlock(&priv->state_lock);
116 kfree(vxlan_work); 122 kfree(vxlan_work);
117} 123}
118 124
119static void __mlx5e_vxlan_core_del_port(struct mlx5e_priv *priv, u16 port) 125static void mlx5e_vxlan_del_port(struct work_struct *work)
120{ 126{
127 struct mlx5e_vxlan_work *vxlan_work =
128 container_of(work, struct mlx5e_vxlan_work, work);
129 struct mlx5e_priv *priv = vxlan_work->priv;
121 struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; 130 struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
131 u16 port = vxlan_work->port;
122 struct mlx5e_vxlan *vxlan; 132 struct mlx5e_vxlan *vxlan;
133 bool remove = false;
123 134
124 spin_lock_irq(&vxlan_db->lock); 135 mutex_lock(&priv->state_lock);
125 vxlan = radix_tree_delete(&vxlan_db->tree, port); 136 spin_lock_bh(&vxlan_db->lock);
126 spin_unlock_irq(&vxlan_db->lock); 137 vxlan = radix_tree_lookup(&vxlan_db->tree, port);
127
128 if (!vxlan) 138 if (!vxlan)
129 return; 139 goto out_unlock;
130
131 mlx5e_vxlan_core_del_port_cmd(priv->mdev, vxlan->udp_port);
132
133 kfree(vxlan);
134}
135 140
136static void mlx5e_vxlan_del_port(struct work_struct *work) 141 if (atomic_dec_and_test(&vxlan->refcount)) {
137{ 142 radix_tree_delete(&vxlan_db->tree, port);
138 struct mlx5e_vxlan_work *vxlan_work = 143 remove = true;
139 container_of(work, struct mlx5e_vxlan_work, work); 144 }
140 struct mlx5e_priv *priv = vxlan_work->priv;
141 u16 port = vxlan_work->port;
142 145
143 __mlx5e_vxlan_core_del_port(priv, port); 146out_unlock:
147 spin_unlock_bh(&vxlan_db->lock);
144 148
149 if (remove) {
150 mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
151 kfree(vxlan);
152 }
153 mutex_unlock(&priv->state_lock);
145 kfree(vxlan_work); 154 kfree(vxlan_work);
146} 155}
147 156
@@ -171,12 +180,11 @@ void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv)
171 struct mlx5e_vxlan *vxlan; 180 struct mlx5e_vxlan *vxlan;
172 unsigned int port = 0; 181 unsigned int port = 0;
173 182
174 spin_lock_irq(&vxlan_db->lock); 183 /* Lockless since we are the only radix-tree consumers, wq is disabled */
175 while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) { 184 while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) {
176 port = vxlan->udp_port; 185 port = vxlan->udp_port;
177 spin_unlock_irq(&vxlan_db->lock); 186 radix_tree_delete(&vxlan_db->tree, port);
178 __mlx5e_vxlan_core_del_port(priv, (u16)port); 187 mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
179 spin_lock_irq(&vxlan_db->lock); 188 kfree(vxlan);
180 } 189 }
181 spin_unlock_irq(&vxlan_db->lock);
182} 190}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
index 5def12c048e3..5ef6ae7d568a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
@@ -36,6 +36,7 @@
36#include "en.h" 36#include "en.h"
37 37
38struct mlx5e_vxlan { 38struct mlx5e_vxlan {
39 atomic_t refcount;
39 u16 udp_port; 40 u16 udp_port;
40}; 41};
41 42
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 72ef4f8025f0..be657b8533f0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2436,25 +2436,16 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2436 rhashtable_destroy(&mlxsw_sp->router->neigh_ht); 2436 rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2437} 2437}
2438 2438
2439static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
2440 const struct mlxsw_sp_rif *rif)
2441{
2442 char rauht_pl[MLXSW_REG_RAUHT_LEN];
2443
2444 mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
2445 rif->rif_index, rif->addr);
2446 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2447}
2448
2449static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, 2439static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2450 struct mlxsw_sp_rif *rif) 2440 struct mlxsw_sp_rif *rif)
2451{ 2441{
2452 struct mlxsw_sp_neigh_entry *neigh_entry, *tmp; 2442 struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2453 2443
2454 mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif);
2455 list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list, 2444 list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2456 rif_list_node) 2445 rif_list_node) {
2446 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2457 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); 2447 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2448 }
2458} 2449}
2459 2450
2460enum mlxsw_sp_nexthop_type { 2451enum mlxsw_sp_nexthop_type {
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index 4f6553f01178..214b02a3acdd 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -89,6 +89,9 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
89{ 89{
90 struct tc_cls_bpf_offload *cls_bpf = type_data; 90 struct tc_cls_bpf_offload *cls_bpf = type_data;
91 struct nfp_net *nn = cb_priv; 91 struct nfp_net *nn = cb_priv;
92 struct bpf_prog *oldprog;
93 struct nfp_bpf_vnic *bv;
94 int err;
92 95
93 if (type != TC_SETUP_CLSBPF || 96 if (type != TC_SETUP_CLSBPF ||
94 !tc_can_offload(nn->dp.netdev) || 97 !tc_can_offload(nn->dp.netdev) ||
@@ -96,8 +99,6 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
96 cls_bpf->common.protocol != htons(ETH_P_ALL) || 99 cls_bpf->common.protocol != htons(ETH_P_ALL) ||
97 cls_bpf->common.chain_index) 100 cls_bpf->common.chain_index)
98 return -EOPNOTSUPP; 101 return -EOPNOTSUPP;
99 if (nn->dp.bpf_offload_xdp)
100 return -EBUSY;
101 102
102 /* Only support TC direct action */ 103 /* Only support TC direct action */
103 if (!cls_bpf->exts_integrated || 104 if (!cls_bpf->exts_integrated ||
@@ -106,16 +107,25 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
106 return -EOPNOTSUPP; 107 return -EOPNOTSUPP;
107 } 108 }
108 109
109 switch (cls_bpf->command) { 110 if (cls_bpf->command != TC_CLSBPF_OFFLOAD)
110 case TC_CLSBPF_REPLACE:
111 return nfp_net_bpf_offload(nn, cls_bpf->prog, true);
112 case TC_CLSBPF_ADD:
113 return nfp_net_bpf_offload(nn, cls_bpf->prog, false);
114 case TC_CLSBPF_DESTROY:
115 return nfp_net_bpf_offload(nn, NULL, true);
116 default:
117 return -EOPNOTSUPP; 111 return -EOPNOTSUPP;
112
113 bv = nn->app_priv;
114 oldprog = cls_bpf->oldprog;
115
116 /* Don't remove if oldprog doesn't match driver's state */
117 if (bv->tc_prog != oldprog) {
118 oldprog = NULL;
119 if (!cls_bpf->prog)
120 return 0;
118 } 121 }
122
123 err = nfp_net_bpf_offload(nn, cls_bpf->prog, oldprog);
124 if (err)
125 return err;
126
127 bv->tc_prog = cls_bpf->prog;
128 return 0;
119} 129}
120 130
121static int nfp_bpf_setup_tc_block(struct net_device *netdev, 131static int nfp_bpf_setup_tc_block(struct net_device *netdev,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index f49669bf6b44..aae1be9ed056 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -228,6 +228,14 @@ struct nfp_prog {
228 struct list_head insns; 228 struct list_head insns;
229}; 229};
230 230
231/**
232 * struct nfp_bpf_vnic - per-vNIC BPF priv structure
233 * @tc_prog: currently loaded cls_bpf program
234 */
235struct nfp_bpf_vnic {
236 struct bpf_prog *tc_prog;
237};
238
231int nfp_bpf_jit(struct nfp_prog *prog); 239int nfp_bpf_jit(struct nfp_prog *prog);
232 240
233extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops; 241extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops;
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 70c92b649b29..38c924bdd32e 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -253,18 +253,18 @@ static int emac_open(struct net_device *netdev)
253 return ret; 253 return ret;
254 } 254 }
255 255
256 ret = emac_mac_up(adpt); 256 ret = adpt->phy.open(adpt);
257 if (ret) { 257 if (ret) {
258 emac_mac_rx_tx_rings_free_all(adpt); 258 emac_mac_rx_tx_rings_free_all(adpt);
259 free_irq(irq->irq, irq); 259 free_irq(irq->irq, irq);
260 return ret; 260 return ret;
261 } 261 }
262 262
263 ret = adpt->phy.open(adpt); 263 ret = emac_mac_up(adpt);
264 if (ret) { 264 if (ret) {
265 emac_mac_down(adpt);
266 emac_mac_rx_tx_rings_free_all(adpt); 265 emac_mac_rx_tx_rings_free_all(adpt);
267 free_irq(irq->irq, irq); 266 free_irq(irq->irq, irq);
267 adpt->phy.close(adpt);
268 return ret; 268 return ret;
269 } 269 }
270 270
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index e1e5ac053760..ce2ea2d491ac 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -409,7 +409,7 @@ struct stmmac_desc_ops {
409 /* get timestamp value */ 409 /* get timestamp value */
410 u64(*get_timestamp) (void *desc, u32 ats); 410 u64(*get_timestamp) (void *desc, u32 ats);
411 /* get rx timestamp status */ 411 /* get rx timestamp status */
412 int (*get_rx_timestamp_status) (void *desc, u32 ats); 412 int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats);
413 /* Display ring */ 413 /* Display ring */
414 void (*display_ring)(void *head, unsigned int size, bool rx); 414 void (*display_ring)(void *head, unsigned int size, bool rx);
415 /* set MSS via context descriptor */ 415 /* set MSS via context descriptor */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 4b286e27c4ca..7e089bf906b4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -258,7 +258,8 @@ static int dwmac4_rx_check_timestamp(void *desc)
258 return ret; 258 return ret;
259} 259}
260 260
261static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats) 261static int dwmac4_wrback_get_rx_timestamp_status(void *desc, void *next_desc,
262 u32 ats)
262{ 263{
263 struct dma_desc *p = (struct dma_desc *)desc; 264 struct dma_desc *p = (struct dma_desc *)desc;
264 int ret = -EINVAL; 265 int ret = -EINVAL;
@@ -270,7 +271,7 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats)
270 271
271 /* Check if timestamp is OK from context descriptor */ 272 /* Check if timestamp is OK from context descriptor */
272 do { 273 do {
273 ret = dwmac4_rx_check_timestamp(desc); 274 ret = dwmac4_rx_check_timestamp(next_desc);
274 if (ret < 0) 275 if (ret < 0)
275 goto exit; 276 goto exit;
276 i++; 277 i++;
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index 7546b3664113..2a828a312814 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -400,7 +400,8 @@ static u64 enh_desc_get_timestamp(void *desc, u32 ats)
400 return ns; 400 return ns;
401} 401}
402 402
403static int enh_desc_get_rx_timestamp_status(void *desc, u32 ats) 403static int enh_desc_get_rx_timestamp_status(void *desc, void *next_desc,
404 u32 ats)
404{ 405{
405 if (ats) { 406 if (ats) {
406 struct dma_extended_desc *p = (struct dma_extended_desc *)desc; 407 struct dma_extended_desc *p = (struct dma_extended_desc *)desc;
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index f817f8f36569..db4cee57bb24 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -265,7 +265,7 @@ static u64 ndesc_get_timestamp(void *desc, u32 ats)
265 return ns; 265 return ns;
266} 266}
267 267
268static int ndesc_get_rx_timestamp_status(void *desc, u32 ats) 268static int ndesc_get_rx_timestamp_status(void *desc, void *next_desc, u32 ats)
269{ 269{
270 struct dma_desc *p = (struct dma_desc *)desc; 270 struct dma_desc *p = (struct dma_desc *)desc;
271 271
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index 721b61655261..08c19ebd5306 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -34,6 +34,7 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
34{ 34{
35 u32 value = readl(ioaddr + PTP_TCR); 35 u32 value = readl(ioaddr + PTP_TCR);
36 unsigned long data; 36 unsigned long data;
37 u32 reg_value;
37 38
38 /* For GMAC3.x, 4.x versions, convert the ptp_clock to nano second 39 /* For GMAC3.x, 4.x versions, convert the ptp_clock to nano second
39 * formula = (1/ptp_clock) * 1000000000 40 * formula = (1/ptp_clock) * 1000000000
@@ -50,10 +51,11 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
50 51
51 data &= PTP_SSIR_SSINC_MASK; 52 data &= PTP_SSIR_SSINC_MASK;
52 53
54 reg_value = data;
53 if (gmac4) 55 if (gmac4)
54 data = data << GMAC4_PTP_SSIR_SSINC_SHIFT; 56 reg_value <<= GMAC4_PTP_SSIR_SSINC_SHIFT;
55 57
56 writel(data, ioaddr + PTP_SSIR); 58 writel(reg_value, ioaddr + PTP_SSIR);
57 59
58 return data; 60 return data;
59} 61}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index c52a9963c19d..0323d672e1c5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -482,7 +482,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
482 desc = np; 482 desc = np;
483 483
484 /* Check if timestamp is available */ 484 /* Check if timestamp is available */
485 if (priv->hw->desc->get_rx_timestamp_status(desc, priv->adv_ts)) { 485 if (priv->hw->desc->get_rx_timestamp_status(p, np, priv->adv_ts)) {
486 ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts); 486 ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts);
487 netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns); 487 netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
488 shhwtstamp = skb_hwtstamps(skb); 488 shhwtstamp = skb_hwtstamps(skb);
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index 7da814686ad9..a243fa7ae02f 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -107,6 +107,7 @@ int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type,
107 struct tc_cls_bpf_offload *cls_bpf = type_data; 107 struct tc_cls_bpf_offload *cls_bpf = type_data;
108 struct bpf_prog *prog = cls_bpf->prog; 108 struct bpf_prog *prog = cls_bpf->prog;
109 struct netdevsim *ns = cb_priv; 109 struct netdevsim *ns = cb_priv;
110 struct bpf_prog *oldprog;
110 111
111 if (type != TC_SETUP_CLSBPF || 112 if (type != TC_SETUP_CLSBPF ||
112 !tc_can_offload(ns->netdev) || 113 !tc_can_offload(ns->netdev) ||
@@ -114,25 +115,27 @@ int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type,
114 cls_bpf->common.chain_index) 115 cls_bpf->common.chain_index)
115 return -EOPNOTSUPP; 116 return -EOPNOTSUPP;
116 117
117 if (nsim_xdp_offload_active(ns))
118 return -EBUSY;
119
120 if (!ns->bpf_tc_accept) 118 if (!ns->bpf_tc_accept)
121 return -EOPNOTSUPP; 119 return -EOPNOTSUPP;
122 /* Note: progs without skip_sw will probably not be dev bound */ 120 /* Note: progs without skip_sw will probably not be dev bound */
123 if (prog && !prog->aux->offload && !ns->bpf_tc_non_bound_accept) 121 if (prog && !prog->aux->offload && !ns->bpf_tc_non_bound_accept)
124 return -EOPNOTSUPP; 122 return -EOPNOTSUPP;
125 123
126 switch (cls_bpf->command) { 124 if (cls_bpf->command != TC_CLSBPF_OFFLOAD)
127 case TC_CLSBPF_REPLACE:
128 return nsim_bpf_offload(ns, prog, true);
129 case TC_CLSBPF_ADD:
130 return nsim_bpf_offload(ns, prog, false);
131 case TC_CLSBPF_DESTROY:
132 return nsim_bpf_offload(ns, NULL, true);
133 default:
134 return -EOPNOTSUPP; 125 return -EOPNOTSUPP;
126
127 oldprog = cls_bpf->oldprog;
128
129 /* Don't remove if oldprog doesn't match driver's state */
130 if (ns->bpf_offloaded != oldprog) {
131 oldprog = NULL;
132 if (!cls_bpf->prog)
133 return 0;
134 if (ns->bpf_offloaded)
135 return -EBUSY;
135 } 136 }
137
138 return nsim_bpf_offload(ns, cls_bpf->prog, oldprog);
136} 139}
137 140
138int nsim_bpf_disable_tc(struct netdevsim *ns) 141int nsim_bpf_disable_tc(struct netdevsim *ns)
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 2fc026dc170a..342325a89d5f 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -879,6 +879,8 @@ static int m88e1510_config_init(struct phy_device *phydev)
879 879
880 /* SGMII-to-Copper mode initialization */ 880 /* SGMII-to-Copper mode initialization */
881 if (phydev->interface == PHY_INTERFACE_MODE_SGMII) { 881 if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
882 u32 pause;
883
882 /* Select page 18 */ 884 /* Select page 18 */
883 err = marvell_set_page(phydev, 18); 885 err = marvell_set_page(phydev, 18);
884 if (err < 0) 886 if (err < 0)
@@ -902,6 +904,16 @@ static int m88e1510_config_init(struct phy_device *phydev)
902 err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE); 904 err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE);
903 if (err < 0) 905 if (err < 0)
904 return err; 906 return err;
907
908 /* There appears to be a bug in the 88e1512 when used in
909 * SGMII to copper mode, where the AN advertisment register
910 * clears the pause bits each time a negotiation occurs.
911 * This means we can never be truely sure what was advertised,
912 * so disable Pause support.
913 */
914 pause = SUPPORTED_Pause | SUPPORTED_Asym_Pause;
915 phydev->supported &= ~pause;
916 phydev->advertising &= ~pause;
905 } 917 }
906 918
907 return m88e1121_config_init(phydev); 919 return m88e1121_config_init(phydev);
@@ -2070,7 +2082,8 @@ static struct phy_driver marvell_drivers[] = {
2070 .flags = PHY_HAS_INTERRUPT, 2082 .flags = PHY_HAS_INTERRUPT,
2071 .probe = marvell_probe, 2083 .probe = marvell_probe,
2072 .config_init = &m88e1145_config_init, 2084 .config_init = &m88e1145_config_init,
2073 .config_aneg = &marvell_config_aneg, 2085 .config_aneg = &m88e1101_config_aneg,
2086 .read_status = &genphy_read_status,
2074 .ack_interrupt = &marvell_ack_interrupt, 2087 .ack_interrupt = &marvell_ack_interrupt,
2075 .config_intr = &marvell_config_intr, 2088 .config_intr = &marvell_config_intr,
2076 .resume = &genphy_resume, 2089 .resume = &genphy_resume,
diff --git a/drivers/net/phy/mdio-xgene.c b/drivers/net/phy/mdio-xgene.c
index bfd3090fb055..07c6048200c6 100644
--- a/drivers/net/phy/mdio-xgene.c
+++ b/drivers/net/phy/mdio-xgene.c
@@ -194,8 +194,11 @@ static int xgene_mdio_reset(struct xgene_mdio_pdata *pdata)
194 } 194 }
195 195
196 ret = xgene_enet_ecc_init(pdata); 196 ret = xgene_enet_ecc_init(pdata);
197 if (ret) 197 if (ret) {
198 if (pdata->dev->of_node)
199 clk_disable_unprepare(pdata->clk);
198 return ret; 200 return ret;
201 }
199 xgene_gmac_reset(pdata); 202 xgene_gmac_reset(pdata);
200 203
201 return 0; 204 return 0;
@@ -388,8 +391,10 @@ static int xgene_mdio_probe(struct platform_device *pdev)
388 return ret; 391 return ret;
389 392
390 mdio_bus = mdiobus_alloc(); 393 mdio_bus = mdiobus_alloc();
391 if (!mdio_bus) 394 if (!mdio_bus) {
392 return -ENOMEM; 395 ret = -ENOMEM;
396 goto out_clk;
397 }
393 398
394 mdio_bus->name = "APM X-Gene MDIO bus"; 399 mdio_bus->name = "APM X-Gene MDIO bus";
395 400
@@ -418,7 +423,7 @@ static int xgene_mdio_probe(struct platform_device *pdev)
418 mdio_bus->phy_mask = ~0; 423 mdio_bus->phy_mask = ~0;
419 ret = mdiobus_register(mdio_bus); 424 ret = mdiobus_register(mdio_bus);
420 if (ret) 425 if (ret)
421 goto out; 426 goto out_mdiobus;
422 427
423 acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_HANDLE(dev), 1, 428 acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_HANDLE(dev), 1,
424 acpi_register_phy, NULL, mdio_bus, NULL); 429 acpi_register_phy, NULL, mdio_bus, NULL);
@@ -426,16 +431,20 @@ static int xgene_mdio_probe(struct platform_device *pdev)
426 } 431 }
427 432
428 if (ret) 433 if (ret)
429 goto out; 434 goto out_mdiobus;
430 435
431 pdata->mdio_bus = mdio_bus; 436 pdata->mdio_bus = mdio_bus;
432 xgene_mdio_status = true; 437 xgene_mdio_status = true;
433 438
434 return 0; 439 return 0;
435 440
436out: 441out_mdiobus:
437 mdiobus_free(mdio_bus); 442 mdiobus_free(mdio_bus);
438 443
444out_clk:
445 if (dev->of_node)
446 clk_disable_unprepare(pdata->clk);
447
439 return ret; 448 return ret;
440} 449}
441 450
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 48a0dc238f73..82090ae7ced1 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2155,6 +2155,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
2155 } 2155 }
2156 2156
2157 ndst = &rt->dst; 2157 ndst = &rt->dst;
2158 if (skb_dst(skb)) {
2159 int mtu = dst_mtu(ndst) - VXLAN_HEADROOM;
2160
2161 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
2162 skb, mtu);
2163 }
2164
2158 tos = ip_tunnel_ecn_encap(tos, old_iph, skb); 2165 tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
2159 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); 2166 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
2160 err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr), 2167 err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
@@ -2190,6 +2197,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
2190 goto out_unlock; 2197 goto out_unlock;
2191 } 2198 }
2192 2199
2200 if (skb_dst(skb)) {
2201 int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM;
2202
2203 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
2204 skb, mtu);
2205 }
2206
2193 tos = ip_tunnel_ecn_encap(tos, old_iph, skb); 2207 tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
2194 ttl = ttl ? : ip6_dst_hoplimit(ndst); 2208 ttl = ttl ? : ip6_dst_hoplimit(ndst);
2195 skb_scrub_packet(skb, xnet); 2209 skb_scrub_packet(skb, xnet);
@@ -3103,6 +3117,11 @@ static void vxlan_config_apply(struct net_device *dev,
3103 3117
3104 max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : 3118 max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM :
3105 VXLAN_HEADROOM); 3119 VXLAN_HEADROOM);
3120 if (max_mtu < ETH_MIN_MTU)
3121 max_mtu = ETH_MIN_MTU;
3122
3123 if (!changelink && !conf->mtu)
3124 dev->mtu = max_mtu;
3106 } 3125 }
3107 3126
3108 if (dev->mtu > max_mtu) 3127 if (dev->mtu > max_mtu)
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 10b075a46b26..e8189c07b41f 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -684,6 +684,7 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac,
684 hdr = skb_put(skb, sizeof(*hdr) - ETH_ALEN); 684 hdr = skb_put(skb, sizeof(*hdr) - ETH_ALEN);
685 hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA | 685 hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA |
686 IEEE80211_STYPE_NULLFUNC | 686 IEEE80211_STYPE_NULLFUNC |
687 IEEE80211_FCTL_TODS |
687 (ps ? IEEE80211_FCTL_PM : 0)); 688 (ps ? IEEE80211_FCTL_PM : 0));
688 hdr->duration_id = cpu_to_le16(0); 689 hdr->duration_id = cpu_to_le16(0);
689 memcpy(hdr->addr1, vp->bssid, ETH_ALEN); 690 memcpy(hdr->addr1, vp->bssid, ETH_ALEN);
@@ -3215,7 +3216,7 @@ static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info)
3215 if (!net_eq(wiphy_net(data->hw->wiphy), genl_info_net(info))) 3216 if (!net_eq(wiphy_net(data->hw->wiphy), genl_info_net(info)))
3216 continue; 3217 continue;
3217 3218
3218 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 3219 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
3219 if (!skb) { 3220 if (!skb) {
3220 res = -ENOMEM; 3221 res = -ENOMEM;
3221 goto out_err; 3222 goto out_err;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f837d666cbd4..1e46e60b8f10 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1287,7 +1287,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl,
1287 BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < 1287 BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
1288 NVME_DSM_MAX_RANGES); 1288 NVME_DSM_MAX_RANGES);
1289 1289
1290 queue->limits.discard_alignment = size; 1290 queue->limits.discard_alignment = 0;
1291 queue->limits.discard_granularity = size; 1291 queue->limits.discard_granularity = size;
1292 1292
1293 blk_queue_max_discard_sectors(queue, UINT_MAX); 1293 blk_queue_max_discard_sectors(queue, UINT_MAX);
@@ -1705,7 +1705,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
1705 blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); 1705 blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
1706 blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); 1706 blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
1707 } 1707 }
1708 if (ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) 1708 if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
1709 is_power_of_2(ctrl->max_hw_sectors))
1709 blk_queue_chunk_sectors(q, ctrl->max_hw_sectors); 1710 blk_queue_chunk_sectors(q, ctrl->max_hw_sectors);
1710 blk_queue_virt_boundary(q, ctrl->page_size - 1); 1711 blk_queue_virt_boundary(q, ctrl->page_size - 1);
1711 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) 1712 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
@@ -2869,7 +2870,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2869 2870
2870 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); 2871 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
2871 nvme_set_queue_limits(ctrl, ns->queue); 2872 nvme_set_queue_limits(ctrl, ns->queue);
2872 nvme_setup_streams_ns(ctrl, ns);
2873 2873
2874 id = nvme_identify_ns(ctrl, nsid); 2874 id = nvme_identify_ns(ctrl, nsid);
2875 if (!id) 2875 if (!id)
@@ -2880,6 +2880,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2880 2880
2881 if (nvme_init_ns_head(ns, nsid, id, &new)) 2881 if (nvme_init_ns_head(ns, nsid, id, &new))
2882 goto out_free_id; 2882 goto out_free_id;
2883 nvme_setup_streams_ns(ctrl, ns);
2883 2884
2884#ifdef CONFIG_NVME_MULTIPATH 2885#ifdef CONFIG_NVME_MULTIPATH
2885 /* 2886 /*
@@ -2965,8 +2966,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
2965 return; 2966 return;
2966 2967
2967 if (ns->disk && ns->disk->flags & GENHD_FL_UP) { 2968 if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
2968 if (blk_get_integrity(ns->disk))
2969 blk_integrity_unregister(ns->disk);
2970 nvme_mpath_remove_disk_links(ns); 2969 nvme_mpath_remove_disk_links(ns);
2971 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, 2970 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
2972 &nvme_ns_id_attr_group); 2971 &nvme_ns_id_attr_group);
@@ -2974,6 +2973,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
2974 nvme_nvm_unregister_sysfs(ns); 2973 nvme_nvm_unregister_sysfs(ns);
2975 del_gendisk(ns->disk); 2974 del_gendisk(ns->disk);
2976 blk_cleanup_queue(ns->queue); 2975 blk_cleanup_queue(ns->queue);
2976 if (blk_get_integrity(ns->disk))
2977 blk_integrity_unregister(ns->disk);
2977 } 2978 }
2978 2979
2979 mutex_lock(&ns->ctrl->subsys->lock); 2980 mutex_lock(&ns->ctrl->subsys->lock);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 0a8af4daef89..794e66e4aa20 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3221,7 +3221,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
3221 3221
3222 /* initiate nvme ctrl ref counting teardown */ 3222 /* initiate nvme ctrl ref counting teardown */
3223 nvme_uninit_ctrl(&ctrl->ctrl); 3223 nvme_uninit_ctrl(&ctrl->ctrl);
3224 nvme_put_ctrl(&ctrl->ctrl);
3225 3224
3226 /* Remove core ctrl ref. */ 3225 /* Remove core ctrl ref. */
3227 nvme_put_ctrl(&ctrl->ctrl); 3226 nvme_put_ctrl(&ctrl->ctrl);
diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c
index a25fed52f7e9..41b740aed3a3 100644
--- a/drivers/parisc/lba_pci.c
+++ b/drivers/parisc/lba_pci.c
@@ -1692,3 +1692,36 @@ void lba_set_iregs(struct parisc_device *lba, u32 ibase, u32 imask)
1692 iounmap(base_addr); 1692 iounmap(base_addr);
1693} 1693}
1694 1694
1695
1696/*
1697 * The design of the Diva management card in rp34x0 machines (rp3410, rp3440)
1698 * seems rushed, so that many built-in components simply don't work.
1699 * The following quirks disable the serial AUX port and the built-in ATI RV100
1700 * Radeon 7000 graphics card which both don't have any external connectors and
1701 * thus are useless, and even worse, e.g. the AUX port occupies ttyS0 and as
1702 * such makes those machines the only PARISC machines on which we can't use
1703 * ttyS0 as boot console.
1704 */
1705static void quirk_diva_ati_card(struct pci_dev *dev)
1706{
1707 if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
1708 dev->subsystem_device != 0x1292)
1709 return;
1710
1711 dev_info(&dev->dev, "Hiding Diva built-in ATI card");
1712 dev->device = 0;
1713}
1714DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RADEON_QY,
1715 quirk_diva_ati_card);
1716
1717static void quirk_diva_aux_disable(struct pci_dev *dev)
1718{
1719 if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
1720 dev->subsystem_device != 0x1291)
1721 return;
1722
1723 dev_info(&dev->dev, "Hiding Diva built-in AUX serial device");
1724 dev->device = 0;
1725}
1726DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_DIVA_AUX,
1727 quirk_diva_aux_disable);
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 945099d49f8f..14fd865a5120 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -1012,7 +1012,12 @@ static int pci_pm_thaw_noirq(struct device *dev)
1012 if (pci_has_legacy_pm_support(pci_dev)) 1012 if (pci_has_legacy_pm_support(pci_dev))
1013 return pci_legacy_resume_early(dev); 1013 return pci_legacy_resume_early(dev);
1014 1014
1015 pci_update_current_state(pci_dev, PCI_D0); 1015 /*
1016 * pci_restore_state() requires the device to be in D0 (because of MSI
1017 * restoration among other things), so force it into D0 in case the
1018 * driver's "freeze" callbacks put it into a low-power state directly.
1019 */
1020 pci_set_power_state(pci_dev, PCI_D0);
1016 pci_restore_state(pci_dev); 1021 pci_restore_state(pci_dev);
1017 1022
1018 if (drv && drv->pm && drv->pm->thaw_noirq) 1023 if (drv && drv->pm && drv->pm->thaw_noirq)
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index a007f6249166..bdc28330800e 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5383,6 +5383,13 @@ out:
5383} 5383}
5384EXPORT_SYMBOL_GPL(qeth_poll); 5384EXPORT_SYMBOL_GPL(qeth_poll);
5385 5385
5386static int qeth_setassparms_inspect_rc(struct qeth_ipa_cmd *cmd)
5387{
5388 if (!cmd->hdr.return_code)
5389 cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code;
5390 return cmd->hdr.return_code;
5391}
5392
5386int qeth_setassparms_cb(struct qeth_card *card, 5393int qeth_setassparms_cb(struct qeth_card *card,
5387 struct qeth_reply *reply, unsigned long data) 5394 struct qeth_reply *reply, unsigned long data)
5388{ 5395{
@@ -6239,7 +6246,7 @@ static int qeth_ipa_checksum_run_cmd_cb(struct qeth_card *card,
6239 (struct qeth_checksum_cmd *)reply->param; 6246 (struct qeth_checksum_cmd *)reply->param;
6240 6247
6241 QETH_CARD_TEXT(card, 4, "chkdoccb"); 6248 QETH_CARD_TEXT(card, 4, "chkdoccb");
6242 if (cmd->hdr.return_code) 6249 if (qeth_setassparms_inspect_rc(cmd))
6243 return 0; 6250 return 0;
6244 6251
6245 memset(chksum_cb, 0, sizeof(*chksum_cb)); 6252 memset(chksum_cb, 0, sizeof(*chksum_cb));
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 6e3d81969a77..d52265416da2 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1725,6 +1725,7 @@ struct aac_dev
1725#define FIB_CONTEXT_FLAG_NATIVE_HBA (0x00000010) 1725#define FIB_CONTEXT_FLAG_NATIVE_HBA (0x00000010)
1726#define FIB_CONTEXT_FLAG_NATIVE_HBA_TMF (0x00000020) 1726#define FIB_CONTEXT_FLAG_NATIVE_HBA_TMF (0x00000020)
1727#define FIB_CONTEXT_FLAG_SCSI_CMD (0x00000040) 1727#define FIB_CONTEXT_FLAG_SCSI_CMD (0x00000040)
1728#define FIB_CONTEXT_FLAG_EH_RESET (0x00000080)
1728 1729
1729/* 1730/*
1730 * Define the command values 1731 * Define the command values
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index bdf127aaab41..d55332de08f9 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -1037,7 +1037,7 @@ static int aac_eh_bus_reset(struct scsi_cmnd* cmd)
1037 info = &aac->hba_map[bus][cid]; 1037 info = &aac->hba_map[bus][cid];
1038 if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS || 1038 if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS ||
1039 info->devtype != AAC_DEVTYPE_NATIVE_RAW) { 1039 info->devtype != AAC_DEVTYPE_NATIVE_RAW) {
1040 fib->flags |= FIB_CONTEXT_FLAG_TIMED_OUT; 1040 fib->flags |= FIB_CONTEXT_FLAG_EH_RESET;
1041 cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER; 1041 cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER;
1042 } 1042 }
1043 } 1043 }
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index a4f28b7e4c65..e18877177f1b 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -1576,7 +1576,9 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
1576 return req; 1576 return req;
1577 1577
1578 for_each_bio(bio) { 1578 for_each_bio(bio) {
1579 ret = blk_rq_append_bio(req, bio); 1579 struct bio *bounce_bio = bio;
1580
1581 ret = blk_rq_append_bio(req, &bounce_bio);
1580 if (ret) 1582 if (ret)
1581 return ERR_PTR(ret); 1583 return ERR_PTR(ret);
1582 } 1584 }
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index 449ef5adbb2b..dfb8da83fa50 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -374,10 +374,8 @@ int scsi_dev_info_list_add_keyed(int compatible, char *vendor, char *model,
374 model, compatible); 374 model, compatible);
375 375
376 if (strflags) 376 if (strflags)
377 devinfo->flags = simple_strtoul(strflags, NULL, 0); 377 flags = (__force blist_flags_t)simple_strtoul(strflags, NULL, 0);
378 else 378 devinfo->flags = flags;
379 devinfo->flags = flags;
380
381 devinfo->compatible = compatible; 379 devinfo->compatible = compatible;
382 380
383 if (compatible) 381 if (compatible)
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index be5e919db0e8..0880d975eed3 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -770,7 +770,7 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result,
770 * SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized 770 * SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
771 **/ 771 **/
772static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, 772static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
773 int *bflags, int async) 773 blist_flags_t *bflags, int async)
774{ 774{
775 int ret; 775 int ret;
776 776
@@ -1049,14 +1049,15 @@ static unsigned char *scsi_inq_str(unsigned char *buf, unsigned char *inq,
1049 * - SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized 1049 * - SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
1050 **/ 1050 **/
1051static int scsi_probe_and_add_lun(struct scsi_target *starget, 1051static int scsi_probe_and_add_lun(struct scsi_target *starget,
1052 u64 lun, int *bflagsp, 1052 u64 lun, blist_flags_t *bflagsp,
1053 struct scsi_device **sdevp, 1053 struct scsi_device **sdevp,
1054 enum scsi_scan_mode rescan, 1054 enum scsi_scan_mode rescan,
1055 void *hostdata) 1055 void *hostdata)
1056{ 1056{
1057 struct scsi_device *sdev; 1057 struct scsi_device *sdev;
1058 unsigned char *result; 1058 unsigned char *result;
1059 int bflags, res = SCSI_SCAN_NO_RESPONSE, result_len = 256; 1059 blist_flags_t bflags;
1060 int res = SCSI_SCAN_NO_RESPONSE, result_len = 256;
1060 struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); 1061 struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
1061 1062
1062 /* 1063 /*
@@ -1201,7 +1202,7 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget,
1201 * Modifies sdevscan->lun. 1202 * Modifies sdevscan->lun.
1202 **/ 1203 **/
1203static void scsi_sequential_lun_scan(struct scsi_target *starget, 1204static void scsi_sequential_lun_scan(struct scsi_target *starget,
1204 int bflags, int scsi_level, 1205 blist_flags_t bflags, int scsi_level,
1205 enum scsi_scan_mode rescan) 1206 enum scsi_scan_mode rescan)
1206{ 1207{
1207 uint max_dev_lun; 1208 uint max_dev_lun;
@@ -1292,7 +1293,7 @@ static void scsi_sequential_lun_scan(struct scsi_target *starget,
1292 * 0: scan completed (or no memory, so further scanning is futile) 1293 * 0: scan completed (or no memory, so further scanning is futile)
1293 * 1: could not scan with REPORT LUN 1294 * 1: could not scan with REPORT LUN
1294 **/ 1295 **/
1295static int scsi_report_lun_scan(struct scsi_target *starget, int bflags, 1296static int scsi_report_lun_scan(struct scsi_target *starget, blist_flags_t bflags,
1296 enum scsi_scan_mode rescan) 1297 enum scsi_scan_mode rescan)
1297{ 1298{
1298 unsigned char scsi_cmd[MAX_COMMAND_SIZE]; 1299 unsigned char scsi_cmd[MAX_COMMAND_SIZE];
@@ -1538,7 +1539,7 @@ static void __scsi_scan_target(struct device *parent, unsigned int channel,
1538 unsigned int id, u64 lun, enum scsi_scan_mode rescan) 1539 unsigned int id, u64 lun, enum scsi_scan_mode rescan)
1539{ 1540{
1540 struct Scsi_Host *shost = dev_to_shost(parent); 1541 struct Scsi_Host *shost = dev_to_shost(parent);
1541 int bflags = 0; 1542 blist_flags_t bflags = 0;
1542 int res; 1543 int res;
1543 struct scsi_target *starget; 1544 struct scsi_target *starget;
1544 1545
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 50e7d7e4a861..a9996c16f4ae 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -967,7 +967,8 @@ sdev_show_wwid(struct device *dev, struct device_attribute *attr,
967} 967}
968static DEVICE_ATTR(wwid, S_IRUGO, sdev_show_wwid, NULL); 968static DEVICE_ATTR(wwid, S_IRUGO, sdev_show_wwid, NULL);
969 969
970#define BLIST_FLAG_NAME(name) [ilog2(BLIST_##name)] = #name 970#define BLIST_FLAG_NAME(name) \
971 [ilog2((__force unsigned int)BLIST_##name)] = #name
971static const char *const sdev_bflags_name[] = { 972static const char *const sdev_bflags_name[] = {
972#include "scsi_devinfo_tbl.c" 973#include "scsi_devinfo_tbl.c"
973}; 974};
@@ -984,7 +985,7 @@ sdev_show_blacklist(struct device *dev, struct device_attribute *attr,
984 for (i = 0; i < sizeof(sdev->sdev_bflags) * BITS_PER_BYTE; i++) { 985 for (i = 0; i < sizeof(sdev->sdev_bflags) * BITS_PER_BYTE; i++) {
985 const char *name = NULL; 986 const char *name = NULL;
986 987
987 if (!(sdev->sdev_bflags & BIT(i))) 988 if (!(sdev->sdev_bflags & (__force blist_flags_t)BIT(i)))
988 continue; 989 continue;
989 if (i < ARRAY_SIZE(sdev_bflags_name) && sdev_bflags_name[i]) 990 if (i < ARRAY_SIZE(sdev_bflags_name) && sdev_bflags_name[i])
990 name = sdev_bflags_name[i]; 991 name = sdev_bflags_name[i];
diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c
index d0219e36080c..10ebb213ddb3 100644
--- a/drivers/scsi/scsi_transport_spi.c
+++ b/drivers/scsi/scsi_transport_spi.c
@@ -50,14 +50,14 @@
50 50
51/* Our blacklist flags */ 51/* Our blacklist flags */
52enum { 52enum {
53 SPI_BLIST_NOIUS = 0x1, 53 SPI_BLIST_NOIUS = (__force blist_flags_t)0x1,
54}; 54};
55 55
56/* blacklist table, modelled on scsi_devinfo.c */ 56/* blacklist table, modelled on scsi_devinfo.c */
57static struct { 57static struct {
58 char *vendor; 58 char *vendor;
59 char *model; 59 char *model;
60 unsigned flags; 60 blist_flags_t flags;
61} spi_static_device_list[] __initdata = { 61} spi_static_device_list[] __initdata = {
62 {"HP", "Ultrium 3-SCSI", SPI_BLIST_NOIUS }, 62 {"HP", "Ultrium 3-SCSI", SPI_BLIST_NOIUS },
63 {"IBM", "ULTRIUM-TD3", SPI_BLIST_NOIUS }, 63 {"IBM", "ULTRIUM-TD3", SPI_BLIST_NOIUS },
@@ -221,9 +221,11 @@ static int spi_device_configure(struct transport_container *tc,
221{ 221{
222 struct scsi_device *sdev = to_scsi_device(dev); 222 struct scsi_device *sdev = to_scsi_device(dev);
223 struct scsi_target *starget = sdev->sdev_target; 223 struct scsi_target *starget = sdev->sdev_target;
224 unsigned bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8], 224 blist_flags_t bflags;
225 &sdev->inquiry[16], 225
226 SCSI_DEVINFO_SPI); 226 bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8],
227 &sdev->inquiry[16],
228 SCSI_DEVINFO_SPI);
227 229
228 /* Populate the target capability fields with the values 230 /* Populate the target capability fields with the values
229 * gleaned from the device inquiry */ 231 * gleaned from the device inquiry */
diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c
index 77fe55ce790c..d65345312527 100644
--- a/drivers/spi/spi-armada-3700.c
+++ b/drivers/spi/spi-armada-3700.c
@@ -79,6 +79,7 @@
79#define A3700_SPI_BYTE_LEN BIT(5) 79#define A3700_SPI_BYTE_LEN BIT(5)
80#define A3700_SPI_CLK_PRESCALE BIT(0) 80#define A3700_SPI_CLK_PRESCALE BIT(0)
81#define A3700_SPI_CLK_PRESCALE_MASK (0x1f) 81#define A3700_SPI_CLK_PRESCALE_MASK (0x1f)
82#define A3700_SPI_CLK_EVEN_OFFS (0x10)
82 83
83#define A3700_SPI_WFIFO_THRS_BIT 28 84#define A3700_SPI_WFIFO_THRS_BIT 28
84#define A3700_SPI_RFIFO_THRS_BIT 24 85#define A3700_SPI_RFIFO_THRS_BIT 24
@@ -220,6 +221,13 @@ static void a3700_spi_clock_set(struct a3700_spi *a3700_spi,
220 221
221 prescale = DIV_ROUND_UP(clk_get_rate(a3700_spi->clk), speed_hz); 222 prescale = DIV_ROUND_UP(clk_get_rate(a3700_spi->clk), speed_hz);
222 223
224 /* For prescaler values over 15, we can only set it by steps of 2.
225 * Starting from A3700_SPI_CLK_EVEN_OFFS, we set values from 0 up to
226 * 30. We only use this range from 16 to 30.
227 */
228 if (prescale > 15)
229 prescale = A3700_SPI_CLK_EVEN_OFFS + DIV_ROUND_UP(prescale, 2);
230
223 val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG); 231 val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
224 val = val & ~A3700_SPI_CLK_PRESCALE_MASK; 232 val = val & ~A3700_SPI_CLK_PRESCALE_MASK;
225 233
diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index f95da364c283..669470971023 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -1661,12 +1661,12 @@ static int atmel_spi_remove(struct platform_device *pdev)
1661 pm_runtime_get_sync(&pdev->dev); 1661 pm_runtime_get_sync(&pdev->dev);
1662 1662
1663 /* reset the hardware and block queue progress */ 1663 /* reset the hardware and block queue progress */
1664 spin_lock_irq(&as->lock);
1665 if (as->use_dma) { 1664 if (as->use_dma) {
1666 atmel_spi_stop_dma(master); 1665 atmel_spi_stop_dma(master);
1667 atmel_spi_release_dma(master); 1666 atmel_spi_release_dma(master);
1668 } 1667 }
1669 1668
1669 spin_lock_irq(&as->lock);
1670 spi_writel(as, CR, SPI_BIT(SWRST)); 1670 spi_writel(as, CR, SPI_BIT(SWRST));
1671 spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */ 1671 spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */
1672 spi_readl(as, SR); 1672 spi_readl(as, SR);
diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index 2ce875764ca6..0835a8d88fb8 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -377,8 +377,8 @@ static int qspi_set_config_register(struct rspi_data *rspi, int access_size)
377 /* Sets SPCMD */ 377 /* Sets SPCMD */
378 rspi_write16(rspi, rspi->spcmd, RSPI_SPCMD0); 378 rspi_write16(rspi, rspi->spcmd, RSPI_SPCMD0);
379 379
380 /* Enables SPI function in master mode */ 380 /* Sets RSPI mode */
381 rspi_write8(rspi, SPCR_SPE | SPCR_MSTR, RSPI_SPCR); 381 rspi_write8(rspi, SPCR_MSTR, RSPI_SPCR);
382 382
383 return 0; 383 return 0;
384} 384}
diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c
index c5cd635c28f3..41410031f8e9 100644
--- a/drivers/spi/spi-sun4i.c
+++ b/drivers/spi/spi-sun4i.c
@@ -525,7 +525,7 @@ err_free_master:
525 525
526static int sun4i_spi_remove(struct platform_device *pdev) 526static int sun4i_spi_remove(struct platform_device *pdev)
527{ 527{
528 pm_runtime_disable(&pdev->dev); 528 pm_runtime_force_suspend(&pdev->dev);
529 529
530 return 0; 530 return 0;
531} 531}
diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c
index bc7100b93dfc..e0b9fe1d0e37 100644
--- a/drivers/spi/spi-xilinx.c
+++ b/drivers/spi/spi-xilinx.c
@@ -271,6 +271,7 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
271 while (remaining_words) { 271 while (remaining_words) {
272 int n_words, tx_words, rx_words; 272 int n_words, tx_words, rx_words;
273 u32 sr; 273 u32 sr;
274 int stalled;
274 275
275 n_words = min(remaining_words, xspi->buffer_size); 276 n_words = min(remaining_words, xspi->buffer_size);
276 277
@@ -299,7 +300,17 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
299 300
300 /* Read out all the data from the Rx FIFO */ 301 /* Read out all the data from the Rx FIFO */
301 rx_words = n_words; 302 rx_words = n_words;
303 stalled = 10;
302 while (rx_words) { 304 while (rx_words) {
305 if (rx_words == n_words && !(stalled--) &&
306 !(sr & XSPI_SR_TX_EMPTY_MASK) &&
307 (sr & XSPI_SR_RX_EMPTY_MASK)) {
308 dev_err(&spi->dev,
309 "Detected stall. Check C_SPI_MODE and C_SPI_MEMORY\n");
310 xspi_init_hw(xspi);
311 return -EIO;
312 }
313
303 if ((sr & XSPI_SR_TX_EMPTY_MASK) && (rx_words > 1)) { 314 if ((sr & XSPI_SR_TX_EMPTY_MASK) && (rx_words > 1)) {
304 xilinx_spi_rx(xspi); 315 xilinx_spi_rx(xspi);
305 rx_words--; 316 rx_words--;
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 7c69b4a9694d..0d99b242e82e 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -920,7 +920,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
920 " %d i: %d bio: %p, allocating another" 920 " %d i: %d bio: %p, allocating another"
921 " bio\n", bio->bi_vcnt, i, bio); 921 " bio\n", bio->bi_vcnt, i, bio);
922 922
923 rc = blk_rq_append_bio(req, bio); 923 rc = blk_rq_append_bio(req, &bio);
924 if (rc) { 924 if (rc) {
925 pr_err("pSCSI: failed to append bio\n"); 925 pr_err("pSCSI: failed to append bio\n");
926 goto fail; 926 goto fail;
@@ -938,7 +938,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
938 } 938 }
939 939
940 if (bio) { 940 if (bio) {
941 rc = blk_rq_append_bio(req, bio); 941 rc = blk_rq_append_bio(req, &bio);
942 if (rc) { 942 if (rc) {
943 pr_err("pSCSI: failed to append bio\n"); 943 pr_err("pSCSI: failed to append bio\n");
944 goto fail; 944 goto fail;
diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig
index f937082f3244..58e2fe40b2a0 100644
--- a/fs/cramfs/Kconfig
+++ b/fs/cramfs/Kconfig
@@ -34,6 +34,7 @@ config CRAMFS_BLOCKDEV
34config CRAMFS_MTD 34config CRAMFS_MTD
35 bool "Support CramFs image directly mapped in physical memory" 35 bool "Support CramFs image directly mapped in physical memory"
36 depends on CRAMFS && MTD 36 depends on CRAMFS && MTD
37 depends on CRAMFS=m || MTD=y
37 default y if !CRAMFS_BLOCKDEV 38 default y if !CRAMFS_BLOCKDEV
38 help 39 help
39 This option allows the CramFs driver to load data directly from 40 This option allows the CramFs driver to load data directly from
diff --git a/fs/exec.c b/fs/exec.c
index 156f56acfe8e..5688b5e1b937 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1339,15 +1339,10 @@ void setup_new_exec(struct linux_binprm * bprm)
1339 * avoid bad behavior from the prior rlimits. This has to 1339 * avoid bad behavior from the prior rlimits. This has to
1340 * happen before arch_pick_mmap_layout(), which examines 1340 * happen before arch_pick_mmap_layout(), which examines
1341 * RLIMIT_STACK, but after the point of no return to avoid 1341 * RLIMIT_STACK, but after the point of no return to avoid
1342 * races from other threads changing the limits. This also 1342 * needing to clean up the change on failure.
1343 * must be protected from races with prlimit() calls.
1344 */ 1343 */
1345 task_lock(current->group_leader);
1346 if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM) 1344 if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM)
1347 current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM; 1345 current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM;
1348 if (current->signal->rlim[RLIMIT_STACK].rlim_max > _STK_LIM)
1349 current->signal->rlim[RLIMIT_STACK].rlim_max = _STK_LIM;
1350 task_unlock(current->group_leader);
1351 } 1346 }
1352 1347
1353 arch_pick_mmap_layout(current->mm); 1348 arch_pick_mmap_layout(current->mm);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 07bca11749d4..c941251ac0c0 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4722,6 +4722,7 @@ retry:
4722 EXT4_INODE_EOFBLOCKS); 4722 EXT4_INODE_EOFBLOCKS);
4723 } 4723 }
4724 ext4_mark_inode_dirty(handle, inode); 4724 ext4_mark_inode_dirty(handle, inode);
4725 ext4_update_inode_fsync_trans(handle, inode, 1);
4725 ret2 = ext4_journal_stop(handle); 4726 ret2 = ext4_journal_stop(handle);
4726 if (ret2) 4727 if (ret2)
4727 break; 4728 break;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index b4267d72f249..b32cf263750d 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -816,6 +816,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
816#ifdef CONFIG_EXT4_FS_POSIX_ACL 816#ifdef CONFIG_EXT4_FS_POSIX_ACL
817 struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT); 817 struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT);
818 818
819 if (IS_ERR(p))
820 return ERR_CAST(p);
819 if (p) { 821 if (p) {
820 int acl_size = p->a_count * sizeof(ext4_acl_entry); 822 int acl_size = p->a_count * sizeof(ext4_acl_entry);
821 823
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7df2c5644e59..534a9130f625 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -149,6 +149,15 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
149 */ 149 */
150int ext4_inode_is_fast_symlink(struct inode *inode) 150int ext4_inode_is_fast_symlink(struct inode *inode)
151{ 151{
152 if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
153 int ea_blocks = EXT4_I(inode)->i_file_acl ?
154 EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0;
155
156 if (ext4_has_inline_data(inode))
157 return 0;
158
159 return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
160 }
152 return S_ISLNK(inode->i_mode) && inode->i_size && 161 return S_ISLNK(inode->i_mode) && inode->i_size &&
153 (inode->i_size < EXT4_N_BLOCKS * 4); 162 (inode->i_size < EXT4_N_BLOCKS * 4);
154} 163}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 798b3ac680db..e750d68fbcb5 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1399,6 +1399,10 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
1399 "falling back\n")); 1399 "falling back\n"));
1400 } 1400 }
1401 nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); 1401 nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1402 if (!nblocks) {
1403 ret = NULL;
1404 goto cleanup_and_exit;
1405 }
1402 start = EXT4_I(dir)->i_dir_start_lookup; 1406 start = EXT4_I(dir)->i_dir_start_lookup;
1403 if (start >= nblocks) 1407 if (start >= nblocks)
1404 start = 0; 1408 start = 0;
diff --git a/fs/namespace.c b/fs/namespace.c
index e158ec6b527b..9d1374ab6e06 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2826,6 +2826,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
2826 SB_DIRSYNC | 2826 SB_DIRSYNC |
2827 SB_SILENT | 2827 SB_SILENT |
2828 SB_POSIXACL | 2828 SB_POSIXACL |
2829 SB_LAZYTIME |
2829 SB_I_VERSION); 2830 SB_I_VERSION);
2830 2831
2831 if (flags & MS_REMOUNT) 2832 if (flags & MS_REMOUNT)
diff --git a/fs/super.c b/fs/super.c
index d4e33e8f1e6f..7ff1349609e4 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -191,6 +191,24 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
191 191
192 INIT_LIST_HEAD(&s->s_mounts); 192 INIT_LIST_HEAD(&s->s_mounts);
193 s->s_user_ns = get_user_ns(user_ns); 193 s->s_user_ns = get_user_ns(user_ns);
194 init_rwsem(&s->s_umount);
195 lockdep_set_class(&s->s_umount, &type->s_umount_key);
196 /*
197 * sget() can have s_umount recursion.
198 *
199 * When it cannot find a suitable sb, it allocates a new
200 * one (this one), and tries again to find a suitable old
201 * one.
202 *
203 * In case that succeeds, it will acquire the s_umount
204 * lock of the old one. Since these are clearly distrinct
205 * locks, and this object isn't exposed yet, there's no
206 * risk of deadlocks.
207 *
208 * Annotate this by putting this lock in a different
209 * subclass.
210 */
211 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
194 212
195 if (security_sb_alloc(s)) 213 if (security_sb_alloc(s))
196 goto fail; 214 goto fail;
@@ -218,25 +236,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
218 goto fail; 236 goto fail;
219 if (list_lru_init_memcg(&s->s_inode_lru)) 237 if (list_lru_init_memcg(&s->s_inode_lru))
220 goto fail; 238 goto fail;
221
222 init_rwsem(&s->s_umount);
223 lockdep_set_class(&s->s_umount, &type->s_umount_key);
224 /*
225 * sget() can have s_umount recursion.
226 *
227 * When it cannot find a suitable sb, it allocates a new
228 * one (this one), and tries again to find a suitable old
229 * one.
230 *
231 * In case that succeeds, it will acquire the s_umount
232 * lock of the old one. Since these are clearly distrinct
233 * locks, and this object isn't exposed yet, there's no
234 * risk of deadlocks.
235 *
236 * Annotate this by putting this lock in a different
237 * subclass.
238 */
239 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
240 s->s_count = 1; 239 s->s_count = 1;
241 atomic_set(&s->s_active, 1); 240 atomic_set(&s->s_active, 1);
242 mutex_init(&s->s_vfs_rename_mutex); 241 mutex_init(&s->s_vfs_rename_mutex);
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 6e45608b2399..9da6ce22803f 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -62,7 +62,7 @@ struct arch_timer_cpu {
62 bool enabled; 62 bool enabled;
63}; 63};
64 64
65int kvm_timer_hyp_init(void); 65int kvm_timer_hyp_init(bool);
66int kvm_timer_enable(struct kvm_vcpu *vcpu); 66int kvm_timer_enable(struct kvm_vcpu *vcpu);
67int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu); 67int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu);
68void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); 68void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 82f0c8fd7be8..23d29b39f71e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -492,6 +492,8 @@ extern unsigned int bvec_nr_vecs(unsigned short idx);
492 492
493#define bio_set_dev(bio, bdev) \ 493#define bio_set_dev(bio, bdev) \
494do { \ 494do { \
495 if ((bio)->bi_disk != (bdev)->bd_disk) \
496 bio_clear_flag(bio, BIO_THROTTLED);\
495 (bio)->bi_disk = (bdev)->bd_disk; \ 497 (bio)->bi_disk = (bdev)->bd_disk; \
496 (bio)->bi_partno = (bdev)->bd_partno; \ 498 (bio)->bi_partno = (bdev)->bd_partno; \
497} while (0) 499} while (0)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index a1e628e032da..9e7d8bd776d2 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -50,8 +50,6 @@ struct blk_issue_stat {
50struct bio { 50struct bio {
51 struct bio *bi_next; /* request queue link */ 51 struct bio *bi_next; /* request queue link */
52 struct gendisk *bi_disk; 52 struct gendisk *bi_disk;
53 u8 bi_partno;
54 blk_status_t bi_status;
55 unsigned int bi_opf; /* bottom bits req flags, 53 unsigned int bi_opf; /* bottom bits req flags,
56 * top bits REQ_OP. Use 54 * top bits REQ_OP. Use
57 * accessors. 55 * accessors.
@@ -59,8 +57,8 @@ struct bio {
59 unsigned short bi_flags; /* status, etc and bvec pool number */ 57 unsigned short bi_flags; /* status, etc and bvec pool number */
60 unsigned short bi_ioprio; 58 unsigned short bi_ioprio;
61 unsigned short bi_write_hint; 59 unsigned short bi_write_hint;
62 60 blk_status_t bi_status;
63 struct bvec_iter bi_iter; 61 u8 bi_partno;
64 62
65 /* Number of segments in this BIO after 63 /* Number of segments in this BIO after
66 * physical address coalescing is performed. 64 * physical address coalescing is performed.
@@ -74,8 +72,9 @@ struct bio {
74 unsigned int bi_seg_front_size; 72 unsigned int bi_seg_front_size;
75 unsigned int bi_seg_back_size; 73 unsigned int bi_seg_back_size;
76 74
77 atomic_t __bi_remaining; 75 struct bvec_iter bi_iter;
78 76
77 atomic_t __bi_remaining;
79 bio_end_io_t *bi_end_io; 78 bio_end_io_t *bi_end_io;
80 79
81 void *bi_private; 80 void *bi_private;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8089ca17db9a..0ce8a372d506 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -135,7 +135,7 @@ typedef __u32 __bitwise req_flags_t;
135struct request { 135struct request {
136 struct list_head queuelist; 136 struct list_head queuelist;
137 union { 137 union {
138 call_single_data_t csd; 138 struct __call_single_data csd;
139 u64 fifo_time; 139 u64 fifo_time;
140 }; 140 };
141 141
@@ -241,14 +241,24 @@ struct request {
241 struct request *next_rq; 241 struct request *next_rq;
242}; 242};
243 243
244static inline bool blk_op_is_scsi(unsigned int op)
245{
246 return op == REQ_OP_SCSI_IN || op == REQ_OP_SCSI_OUT;
247}
248
249static inline bool blk_op_is_private(unsigned int op)
250{
251 return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT;
252}
253
244static inline bool blk_rq_is_scsi(struct request *rq) 254static inline bool blk_rq_is_scsi(struct request *rq)
245{ 255{
246 return req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT; 256 return blk_op_is_scsi(req_op(rq));
247} 257}
248 258
249static inline bool blk_rq_is_private(struct request *rq) 259static inline bool blk_rq_is_private(struct request *rq)
250{ 260{
251 return req_op(rq) == REQ_OP_DRV_IN || req_op(rq) == REQ_OP_DRV_OUT; 261 return blk_op_is_private(req_op(rq));
252} 262}
253 263
254static inline bool blk_rq_is_passthrough(struct request *rq) 264static inline bool blk_rq_is_passthrough(struct request *rq)
@@ -256,6 +266,13 @@ static inline bool blk_rq_is_passthrough(struct request *rq)
256 return blk_rq_is_scsi(rq) || blk_rq_is_private(rq); 266 return blk_rq_is_scsi(rq) || blk_rq_is_private(rq);
257} 267}
258 268
269static inline bool bio_is_passthrough(struct bio *bio)
270{
271 unsigned op = bio_op(bio);
272
273 return blk_op_is_scsi(op) || blk_op_is_private(op);
274}
275
259static inline unsigned short req_get_ioprio(struct request *req) 276static inline unsigned short req_get_ioprio(struct request *req)
260{ 277{
261 return req->ioprio; 278 return req->ioprio;
@@ -948,7 +965,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
948extern void blk_rq_unprep_clone(struct request *rq); 965extern void blk_rq_unprep_clone(struct request *rq);
949extern blk_status_t blk_insert_cloned_request(struct request_queue *q, 966extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
950 struct request *rq); 967 struct request *rq);
951extern int blk_rq_append_bio(struct request *rq, struct bio *bio); 968extern int blk_rq_append_bio(struct request *rq, struct bio **bio);
952extern void blk_delay_queue(struct request_queue *, unsigned long); 969extern void blk_delay_queue(struct request_queue *, unsigned long);
953extern void blk_queue_split(struct request_queue *, struct bio **); 970extern void blk_queue_split(struct request_queue *, struct bio **);
954extern void blk_recount_segments(struct request_queue *, struct bio *); 971extern void blk_recount_segments(struct request_queue *, struct bio *);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index aaac589e490c..c009e472f647 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -15,11 +15,11 @@
15 * In practice this is far bigger than any realistic pointer offset; this limit 15 * In practice this is far bigger than any realistic pointer offset; this limit
16 * ensures that umax_value + (int)off + (int)size cannot overflow a u64. 16 * ensures that umax_value + (int)off + (int)size cannot overflow a u64.
17 */ 17 */
18#define BPF_MAX_VAR_OFF (1ULL << 31) 18#define BPF_MAX_VAR_OFF (1 << 29)
19/* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO]. This ensures 19/* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO]. This ensures
20 * that converting umax_value to int cannot overflow. 20 * that converting umax_value to int cannot overflow.
21 */ 21 */
22#define BPF_MAX_VAR_SIZ INT_MAX 22#define BPF_MAX_VAR_SIZ (1 << 29)
23 23
24/* Liveness marks, used for registers and spilled-regs (in stack slots). 24/* Liveness marks, used for registers and spilled-regs (in stack slots).
25 * Read marks propagate upwards until they find a write mark; they record that 25 * Read marks propagate upwards until they find a write mark; they record that
diff --git a/include/linux/pti.h b/include/linux/intel-pti.h
index b3ea01a3197e..2710d72de3c9 100644
--- a/include/linux/pti.h
+++ b/include/linux/intel-pti.h
@@ -22,8 +22,8 @@
22 * interface to write out it's contents for debugging a mobile system. 22 * interface to write out it's contents for debugging a mobile system.
23 */ 23 */
24 24
25#ifndef PTI_H_ 25#ifndef LINUX_INTEL_PTI_H_
26#define PTI_H_ 26#define LINUX_INTEL_PTI_H_
27 27
28/* offset for last dword of any PTI message. Part of MIPI P1149.7 */ 28/* offset for last dword of any PTI message. Part of MIPI P1149.7 */
29#define PTI_LASTDWORD_DTS 0x30 29#define PTI_LASTDWORD_DTS 0x30
@@ -40,4 +40,4 @@ struct pti_masterchannel *pti_request_masterchannel(u8 type,
40 const char *thread_name); 40 const char *thread_name);
41void pti_release_masterchannel(struct pti_masterchannel *mc); 41void pti_release_masterchannel(struct pti_masterchannel *mc);
42 42
43#endif /*PTI_H_*/ 43#endif /* LINUX_INTEL_PTI_H_ */
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index cb18c6290ca8..8415bf1a9776 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -273,7 +273,8 @@ struct ipv6_pinfo {
273 * 100: prefer care-of address 273 * 100: prefer care-of address
274 */ 274 */
275 dontfrag:1, 275 dontfrag:1,
276 autoflowlabel:1; 276 autoflowlabel:1,
277 autoflowlabel_set:1;
277 __u8 min_hopcount; 278 __u8 min_hopcount;
278 __u8 tclass; 279 __u8 tclass;
279 __be32 rcv_flowinfo; 280 __be32 rcv_flowinfo;
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index a2a1318a3d0c..c3d3f04d8cc6 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -915,10 +915,10 @@ enum PDEV_STAT {PDEV_STAT_IDLE, PDEV_STAT_RUN};
915#define LTR_L1SS_PWR_GATE_CHECK_CARD_EN BIT(6) 915#define LTR_L1SS_PWR_GATE_CHECK_CARD_EN BIT(6)
916 916
917enum dev_aspm_mode { 917enum dev_aspm_mode {
918 DEV_ASPM_DISABLE = 0,
919 DEV_ASPM_DYNAMIC, 918 DEV_ASPM_DYNAMIC,
920 DEV_ASPM_BACKDOOR, 919 DEV_ASPM_BACKDOOR,
921 DEV_ASPM_STATIC, 920 DEV_ASPM_STATIC,
921 DEV_ASPM_DISABLE,
922}; 922};
923 923
924/* 924/*
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index a886b51511ab..57b109c6e422 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -556,6 +556,7 @@ struct mlx5_core_sriov {
556}; 556};
557 557
558struct mlx5_irq_info { 558struct mlx5_irq_info {
559 cpumask_var_t mask;
559 char name[MLX5_MAX_IRQ_NAME]; 560 char name[MLX5_MAX_IRQ_NAME];
560}; 561};
561 562
@@ -1048,7 +1049,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
1048 enum mlx5_eq_type type); 1049 enum mlx5_eq_type type);
1049int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); 1050int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
1050int mlx5_start_eqs(struct mlx5_core_dev *dev); 1051int mlx5_start_eqs(struct mlx5_core_dev *dev);
1051int mlx5_stop_eqs(struct mlx5_core_dev *dev); 1052void mlx5_stop_eqs(struct mlx5_core_dev *dev);
1052int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, 1053int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
1053 unsigned int *irqn); 1054 unsigned int *irqn);
1054int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); 1055int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 38a7577a9ce7..d44ec5f41d4a 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -147,7 +147,7 @@ enum {
147 MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771, 147 MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771,
148 MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772, 148 MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772,
149 MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773, 149 MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773,
150 MLX5_CMD_OP_SET_RATE_LIMIT = 0x780, 150 MLX5_CMD_OP_SET_PP_RATE_LIMIT = 0x780,
151 MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781, 151 MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781,
152 MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT = 0x782, 152 MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT = 0x782,
153 MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT = 0x783, 153 MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT = 0x783,
@@ -7239,7 +7239,7 @@ struct mlx5_ifc_add_vxlan_udp_dport_in_bits {
7239 u8 vxlan_udp_port[0x10]; 7239 u8 vxlan_udp_port[0x10];
7240}; 7240};
7241 7241
7242struct mlx5_ifc_set_rate_limit_out_bits { 7242struct mlx5_ifc_set_pp_rate_limit_out_bits {
7243 u8 status[0x8]; 7243 u8 status[0x8];
7244 u8 reserved_at_8[0x18]; 7244 u8 reserved_at_8[0x18];
7245 7245
@@ -7248,7 +7248,7 @@ struct mlx5_ifc_set_rate_limit_out_bits {
7248 u8 reserved_at_40[0x40]; 7248 u8 reserved_at_40[0x40];
7249}; 7249};
7250 7250
7251struct mlx5_ifc_set_rate_limit_in_bits { 7251struct mlx5_ifc_set_pp_rate_limit_in_bits {
7252 u8 opcode[0x10]; 7252 u8 opcode[0x10];
7253 u8 reserved_at_10[0x10]; 7253 u8 reserved_at_10[0x10];
7254 7254
@@ -7261,6 +7261,8 @@ struct mlx5_ifc_set_rate_limit_in_bits {
7261 u8 reserved_at_60[0x20]; 7261 u8 reserved_at_60[0x20];
7262 7262
7263 u8 rate_limit[0x20]; 7263 u8 rate_limit[0x20];
7264
7265 u8 reserved_at_a0[0x160];
7264}; 7266};
7265 7267
7266struct mlx5_ifc_access_register_out_bits { 7268struct mlx5_ifc_access_register_out_bits {
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 7b2170bfd6e7..bc6bb325d1bf 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -126,7 +126,7 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
126 * for that name. This appears in the sysfs "modalias" attribute 126 * for that name. This appears in the sysfs "modalias" attribute
127 * for driver coldplugging, and in uevents used for hotplugging 127 * for driver coldplugging, and in uevents used for hotplugging
128 * @cs_gpio: gpio number of the chipselect line (optional, -ENOENT when 128 * @cs_gpio: gpio number of the chipselect line (optional, -ENOENT when
129 * when not using a GPIO line) 129 * not using a GPIO line)
130 * 130 *
131 * @statistics: statistics for the spi_device 131 * @statistics: statistics for the spi_device
132 * 132 *
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8b8118a7fadb..cb4d92b79cd9 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3226,7 +3226,6 @@ struct cfg80211_ops {
3226 * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN. 3226 * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN.
3227 * @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing 3227 * @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing
3228 * auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH. 3228 * auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH.
3229 * @WIPHY_FLAG_SUPPORTS_SCHED_SCAN: The device supports scheduled scans.
3230 * @WIPHY_FLAG_SUPPORTS_FW_ROAM: The device supports roaming feature in the 3229 * @WIPHY_FLAG_SUPPORTS_FW_ROAM: The device supports roaming feature in the
3231 * firmware. 3230 * firmware.
3232 * @WIPHY_FLAG_AP_UAPSD: The device supports uapsd on AP. 3231 * @WIPHY_FLAG_AP_UAPSD: The device supports uapsd on AP.
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 58bba9c769ea..31574c958673 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -696,9 +696,7 @@ struct tc_cls_matchall_offload {
696}; 696};
697 697
698enum tc_clsbpf_command { 698enum tc_clsbpf_command {
699 TC_CLSBPF_ADD, 699 TC_CLSBPF_OFFLOAD,
700 TC_CLSBPF_REPLACE,
701 TC_CLSBPF_DESTROY,
702 TC_CLSBPF_STATS, 700 TC_CLSBPF_STATS,
703}; 701};
704 702
@@ -707,6 +705,7 @@ struct tc_cls_bpf_offload {
707 enum tc_clsbpf_command command; 705 enum tc_clsbpf_command command;
708 struct tcf_exts *exts; 706 struct tcf_exts *exts;
709 struct bpf_prog *prog; 707 struct bpf_prog *prog;
708 struct bpf_prog *oldprog;
710 const char *name; 709 const char *name;
711 bool exts_integrated; 710 bool exts_integrated;
712 u32 gen_flags; 711 u32 gen_flags;
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index e4b0b8e09932..2c735a3e6613 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -211,7 +211,7 @@ TRACE_EVENT(kvm_ack_irq,
211 { KVM_TRACE_MMIO_WRITE, "write" } 211 { KVM_TRACE_MMIO_WRITE, "write" }
212 212
213TRACE_EVENT(kvm_mmio, 213TRACE_EVENT(kvm_mmio,
214 TP_PROTO(int type, int len, u64 gpa, u64 val), 214 TP_PROTO(int type, int len, u64 gpa, void *val),
215 TP_ARGS(type, len, gpa, val), 215 TP_ARGS(type, len, gpa, val),
216 216
217 TP_STRUCT__entry( 217 TP_STRUCT__entry(
@@ -225,7 +225,10 @@ TRACE_EVENT(kvm_mmio,
225 __entry->type = type; 225 __entry->type = type;
226 __entry->len = len; 226 __entry->len = len;
227 __entry->gpa = gpa; 227 __entry->gpa = gpa;
228 __entry->val = val; 228 __entry->val = 0;
229 if (val)
230 memcpy(&__entry->val, val,
231 min_t(u32, sizeof(__entry->val), len));
229 ), 232 ),
230 233
231 TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx", 234 TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 48b2901cf483..1cd2c2d28fc3 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1417,6 +1417,11 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
1417 break; 1417 break;
1418 case PTR_TO_STACK: 1418 case PTR_TO_STACK:
1419 pointer_desc = "stack "; 1419 pointer_desc = "stack ";
1420 /* The stack spill tracking logic in check_stack_write()
1421 * and check_stack_read() relies on stack accesses being
1422 * aligned.
1423 */
1424 strict = true;
1420 break; 1425 break;
1421 default: 1426 default:
1422 break; 1427 break;
@@ -1473,6 +1478,29 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env,
1473 return env->subprog_stack_depth[subprog]; 1478 return env->subprog_stack_depth[subprog];
1474} 1479}
1475 1480
1481/* truncate register to smaller size (in bytes)
1482 * must be called with size < BPF_REG_SIZE
1483 */
1484static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
1485{
1486 u64 mask;
1487
1488 /* clear high bits in bit representation */
1489 reg->var_off = tnum_cast(reg->var_off, size);
1490
1491 /* fix arithmetic bounds */
1492 mask = ((u64)1 << (size * 8)) - 1;
1493 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
1494 reg->umin_value &= mask;
1495 reg->umax_value &= mask;
1496 } else {
1497 reg->umin_value = 0;
1498 reg->umax_value = mask;
1499 }
1500 reg->smin_value = reg->umin_value;
1501 reg->smax_value = reg->umax_value;
1502}
1503
1476/* check whether memory at (regno + off) is accessible for t = (read | write) 1504/* check whether memory at (regno + off) is accessible for t = (read | write)
1477 * if t==write, value_regno is a register which value is stored into memory 1505 * if t==write, value_regno is a register which value is stored into memory
1478 * if t==read, value_regno is a register which will receive the value from memory 1506 * if t==read, value_regno is a register which will receive the value from memory
@@ -1608,9 +1636,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
1608 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && 1636 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
1609 regs[value_regno].type == SCALAR_VALUE) { 1637 regs[value_regno].type == SCALAR_VALUE) {
1610 /* b/h/w load zero-extends, mark upper bits as known 0 */ 1638 /* b/h/w load zero-extends, mark upper bits as known 0 */
1611 regs[value_regno].var_off = 1639 coerce_reg_to_size(&regs[value_regno], size);
1612 tnum_cast(regs[value_regno].var_off, size);
1613 __update_reg_bounds(&regs[value_regno]);
1614 } 1640 }
1615 return err; 1641 return err;
1616} 1642}
@@ -1684,6 +1710,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
1684 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 1710 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1685 verbose(env, "invalid variable stack read R%d var_off=%s\n", 1711 verbose(env, "invalid variable stack read R%d var_off=%s\n",
1686 regno, tn_buf); 1712 regno, tn_buf);
1713 return -EACCES;
1687 } 1714 }
1688 off = reg->off + reg->var_off.value; 1715 off = reg->off + reg->var_off.value;
1689 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || 1716 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
@@ -2206,7 +2233,13 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
2206 return -EINVAL; 2233 return -EINVAL;
2207 } 2234 }
2208 2235
2236 /* With LD_ABS/IND some JITs save/restore skb from r1. */
2209 changes_data = bpf_helper_changes_pkt_data(fn->func); 2237 changes_data = bpf_helper_changes_pkt_data(fn->func);
2238 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
2239 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
2240 func_id_name(func_id), func_id);
2241 return -EINVAL;
2242 }
2210 2243
2211 memset(&meta, 0, sizeof(meta)); 2244 memset(&meta, 0, sizeof(meta));
2212 meta.pkt_access = fn->pkt_access; 2245 meta.pkt_access = fn->pkt_access;
@@ -2298,14 +2331,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
2298 return 0; 2331 return 0;
2299} 2332}
2300 2333
2301static void coerce_reg_to_32(struct bpf_reg_state *reg)
2302{
2303 /* clear high 32 bits */
2304 reg->var_off = tnum_cast(reg->var_off, 4);
2305 /* Update bounds */
2306 __update_reg_bounds(reg);
2307}
2308
2309static bool signed_add_overflows(s64 a, s64 b) 2334static bool signed_add_overflows(s64 a, s64 b)
2310{ 2335{
2311 /* Do the add in u64, where overflow is well-defined */ 2336 /* Do the add in u64, where overflow is well-defined */
@@ -2326,6 +2351,41 @@ static bool signed_sub_overflows(s64 a, s64 b)
2326 return res > a; 2351 return res > a;
2327} 2352}
2328 2353
2354static bool check_reg_sane_offset(struct bpf_verifier_env *env,
2355 const struct bpf_reg_state *reg,
2356 enum bpf_reg_type type)
2357{
2358 bool known = tnum_is_const(reg->var_off);
2359 s64 val = reg->var_off.value;
2360 s64 smin = reg->smin_value;
2361
2362 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
2363 verbose(env, "math between %s pointer and %lld is not allowed\n",
2364 reg_type_str[type], val);
2365 return false;
2366 }
2367
2368 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
2369 verbose(env, "%s pointer offset %d is not allowed\n",
2370 reg_type_str[type], reg->off);
2371 return false;
2372 }
2373
2374 if (smin == S64_MIN) {
2375 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
2376 reg_type_str[type]);
2377 return false;
2378 }
2379
2380 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
2381 verbose(env, "value %lld makes %s pointer be out of bounds\n",
2382 smin, reg_type_str[type]);
2383 return false;
2384 }
2385
2386 return true;
2387}
2388
2329/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. 2389/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
2330 * Caller should also handle BPF_MOV case separately. 2390 * Caller should also handle BPF_MOV case separately.
2331 * If we return -EACCES, caller may want to try again treating pointer as a 2391 * If we return -EACCES, caller may want to try again treating pointer as a
@@ -2364,29 +2424,25 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
2364 2424
2365 if (BPF_CLASS(insn->code) != BPF_ALU64) { 2425 if (BPF_CLASS(insn->code) != BPF_ALU64) {
2366 /* 32-bit ALU ops on pointers produce (meaningless) scalars */ 2426 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
2367 if (!env->allow_ptr_leaks) 2427 verbose(env,
2368 verbose(env, 2428 "R%d 32-bit pointer arithmetic prohibited\n",
2369 "R%d 32-bit pointer arithmetic prohibited\n", 2429 dst);
2370 dst);
2371 return -EACCES; 2430 return -EACCES;
2372 } 2431 }
2373 2432
2374 if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { 2433 if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
2375 if (!env->allow_ptr_leaks) 2434 verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
2376 verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n", 2435 dst);
2377 dst);
2378 return -EACCES; 2436 return -EACCES;
2379 } 2437 }
2380 if (ptr_reg->type == CONST_PTR_TO_MAP) { 2438 if (ptr_reg->type == CONST_PTR_TO_MAP) {
2381 if (!env->allow_ptr_leaks) 2439 verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
2382 verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n", 2440 dst);
2383 dst);
2384 return -EACCES; 2441 return -EACCES;
2385 } 2442 }
2386 if (ptr_reg->type == PTR_TO_PACKET_END) { 2443 if (ptr_reg->type == PTR_TO_PACKET_END) {
2387 if (!env->allow_ptr_leaks) 2444 verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
2388 verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n", 2445 dst);
2389 dst);
2390 return -EACCES; 2446 return -EACCES;
2391 } 2447 }
2392 2448
@@ -2396,6 +2452,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
2396 dst_reg->type = ptr_reg->type; 2452 dst_reg->type = ptr_reg->type;
2397 dst_reg->id = ptr_reg->id; 2453 dst_reg->id = ptr_reg->id;
2398 2454
2455 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
2456 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
2457 return -EINVAL;
2458
2399 switch (opcode) { 2459 switch (opcode) {
2400 case BPF_ADD: 2460 case BPF_ADD:
2401 /* We can take a fixed offset as long as it doesn't overflow 2461 /* We can take a fixed offset as long as it doesn't overflow
@@ -2449,9 +2509,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
2449 case BPF_SUB: 2509 case BPF_SUB:
2450 if (dst_reg == off_reg) { 2510 if (dst_reg == off_reg) {
2451 /* scalar -= pointer. Creates an unknown scalar */ 2511 /* scalar -= pointer. Creates an unknown scalar */
2452 if (!env->allow_ptr_leaks) 2512 verbose(env, "R%d tried to subtract pointer from scalar\n",
2453 verbose(env, "R%d tried to subtract pointer from scalar\n", 2513 dst);
2454 dst);
2455 return -EACCES; 2514 return -EACCES;
2456 } 2515 }
2457 /* We don't allow subtraction from FP, because (according to 2516 /* We don't allow subtraction from FP, because (according to
@@ -2459,9 +2518,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
2459 * be able to deal with it. 2518 * be able to deal with it.
2460 */ 2519 */
2461 if (ptr_reg->type == PTR_TO_STACK) { 2520 if (ptr_reg->type == PTR_TO_STACK) {
2462 if (!env->allow_ptr_leaks) 2521 verbose(env, "R%d subtraction from stack pointer prohibited\n",
2463 verbose(env, "R%d subtraction from stack pointer prohibited\n", 2522 dst);
2464 dst);
2465 return -EACCES; 2523 return -EACCES;
2466 } 2524 }
2467 if (known && (ptr_reg->off - smin_val == 2525 if (known && (ptr_reg->off - smin_val ==
@@ -2510,28 +2568,30 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
2510 case BPF_AND: 2568 case BPF_AND:
2511 case BPF_OR: 2569 case BPF_OR:
2512 case BPF_XOR: 2570 case BPF_XOR:
2513 /* bitwise ops on pointers are troublesome, prohibit for now. 2571 /* bitwise ops on pointers are troublesome, prohibit. */
2514 * (However, in principle we could allow some cases, e.g. 2572 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
2515 * ptr &= ~3 which would reduce min_value by 3.) 2573 dst, bpf_alu_string[opcode >> 4]);
2516 */
2517 if (!env->allow_ptr_leaks)
2518 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
2519 dst, bpf_alu_string[opcode >> 4]);
2520 return -EACCES; 2574 return -EACCES;
2521 default: 2575 default:
2522 /* other operators (e.g. MUL,LSH) produce non-pointer results */ 2576 /* other operators (e.g. MUL,LSH) produce non-pointer results */
2523 if (!env->allow_ptr_leaks) 2577 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
2524 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n", 2578 dst, bpf_alu_string[opcode >> 4]);
2525 dst, bpf_alu_string[opcode >> 4]);
2526 return -EACCES; 2579 return -EACCES;
2527 } 2580 }
2528 2581
2582 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
2583 return -EINVAL;
2584
2529 __update_reg_bounds(dst_reg); 2585 __update_reg_bounds(dst_reg);
2530 __reg_deduce_bounds(dst_reg); 2586 __reg_deduce_bounds(dst_reg);
2531 __reg_bound_offset(dst_reg); 2587 __reg_bound_offset(dst_reg);
2532 return 0; 2588 return 0;
2533} 2589}
2534 2590
2591/* WARNING: This function does calculations on 64-bit values, but the actual
2592 * execution may occur on 32-bit values. Therefore, things like bitshifts
2593 * need extra checks in the 32-bit case.
2594 */
2535static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, 2595static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2536 struct bpf_insn *insn, 2596 struct bpf_insn *insn,
2537 struct bpf_reg_state *dst_reg, 2597 struct bpf_reg_state *dst_reg,
@@ -2542,12 +2602,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2542 bool src_known, dst_known; 2602 bool src_known, dst_known;
2543 s64 smin_val, smax_val; 2603 s64 smin_val, smax_val;
2544 u64 umin_val, umax_val; 2604 u64 umin_val, umax_val;
2605 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
2545 2606
2546 if (BPF_CLASS(insn->code) != BPF_ALU64) {
2547 /* 32-bit ALU ops are (32,32)->64 */
2548 coerce_reg_to_32(dst_reg);
2549 coerce_reg_to_32(&src_reg);
2550 }
2551 smin_val = src_reg.smin_value; 2607 smin_val = src_reg.smin_value;
2552 smax_val = src_reg.smax_value; 2608 smax_val = src_reg.smax_value;
2553 umin_val = src_reg.umin_value; 2609 umin_val = src_reg.umin_value;
@@ -2555,6 +2611,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2555 src_known = tnum_is_const(src_reg.var_off); 2611 src_known = tnum_is_const(src_reg.var_off);
2556 dst_known = tnum_is_const(dst_reg->var_off); 2612 dst_known = tnum_is_const(dst_reg->var_off);
2557 2613
2614 if (!src_known &&
2615 opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
2616 __mark_reg_unknown(dst_reg);
2617 return 0;
2618 }
2619
2558 switch (opcode) { 2620 switch (opcode) {
2559 case BPF_ADD: 2621 case BPF_ADD:
2560 if (signed_add_overflows(dst_reg->smin_value, smin_val) || 2622 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
@@ -2683,9 +2745,9 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2683 __update_reg_bounds(dst_reg); 2745 __update_reg_bounds(dst_reg);
2684 break; 2746 break;
2685 case BPF_LSH: 2747 case BPF_LSH:
2686 if (umax_val > 63) { 2748 if (umax_val >= insn_bitness) {
2687 /* Shifts greater than 63 are undefined. This includes 2749 /* Shifts greater than 31 or 63 are undefined.
2688 * shifts by a negative number. 2750 * This includes shifts by a negative number.
2689 */ 2751 */
2690 mark_reg_unknown(env, regs, insn->dst_reg); 2752 mark_reg_unknown(env, regs, insn->dst_reg);
2691 break; 2753 break;
@@ -2711,27 +2773,29 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2711 __update_reg_bounds(dst_reg); 2773 __update_reg_bounds(dst_reg);
2712 break; 2774 break;
2713 case BPF_RSH: 2775 case BPF_RSH:
2714 if (umax_val > 63) { 2776 if (umax_val >= insn_bitness) {
2715 /* Shifts greater than 63 are undefined. This includes 2777 /* Shifts greater than 31 or 63 are undefined.
2716 * shifts by a negative number. 2778 * This includes shifts by a negative number.
2717 */ 2779 */
2718 mark_reg_unknown(env, regs, insn->dst_reg); 2780 mark_reg_unknown(env, regs, insn->dst_reg);
2719 break; 2781 break;
2720 } 2782 }
2721 /* BPF_RSH is an unsigned shift, so make the appropriate casts */ 2783 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
2722 if (dst_reg->smin_value < 0) { 2784 * be negative, then either:
2723 if (umin_val) { 2785 * 1) src_reg might be zero, so the sign bit of the result is
2724 /* Sign bit will be cleared */ 2786 * unknown, so we lose our signed bounds
2725 dst_reg->smin_value = 0; 2787 * 2) it's known negative, thus the unsigned bounds capture the
2726 } else { 2788 * signed bounds
2727 /* Lost sign bit information */ 2789 * 3) the signed bounds cross zero, so they tell us nothing
2728 dst_reg->smin_value = S64_MIN; 2790 * about the result
2729 dst_reg->smax_value = S64_MAX; 2791 * If the value in dst_reg is known nonnegative, then again the
2730 } 2792 * unsigned bounts capture the signed bounds.
2731 } else { 2793 * Thus, in all cases it suffices to blow away our signed bounds
2732 dst_reg->smin_value = 2794 * and rely on inferring new ones from the unsigned bounds and
2733 (u64)(dst_reg->smin_value) >> umax_val; 2795 * var_off of the result.
2734 } 2796 */
2797 dst_reg->smin_value = S64_MIN;
2798 dst_reg->smax_value = S64_MAX;
2735 if (src_known) 2799 if (src_known)
2736 dst_reg->var_off = tnum_rshift(dst_reg->var_off, 2800 dst_reg->var_off = tnum_rshift(dst_reg->var_off,
2737 umin_val); 2801 umin_val);
@@ -2747,6 +2811,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2747 break; 2811 break;
2748 } 2812 }
2749 2813
2814 if (BPF_CLASS(insn->code) != BPF_ALU64) {
2815 /* 32-bit ALU ops are (32,32)->32 */
2816 coerce_reg_to_size(dst_reg, 4);
2817 coerce_reg_to_size(&src_reg, 4);
2818 }
2819
2750 __reg_deduce_bounds(dst_reg); 2820 __reg_deduce_bounds(dst_reg);
2751 __reg_bound_offset(dst_reg); 2821 __reg_bound_offset(dst_reg);
2752 return 0; 2822 return 0;
@@ -2763,7 +2833,6 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2763 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg; 2833 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
2764 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; 2834 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
2765 u8 opcode = BPF_OP(insn->code); 2835 u8 opcode = BPF_OP(insn->code);
2766 int rc;
2767 2836
2768 dst_reg = &regs[insn->dst_reg]; 2837 dst_reg = &regs[insn->dst_reg];
2769 src_reg = NULL; 2838 src_reg = NULL;
@@ -2774,43 +2843,29 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2774 if (src_reg->type != SCALAR_VALUE) { 2843 if (src_reg->type != SCALAR_VALUE) {
2775 if (dst_reg->type != SCALAR_VALUE) { 2844 if (dst_reg->type != SCALAR_VALUE) {
2776 /* Combining two pointers by any ALU op yields 2845 /* Combining two pointers by any ALU op yields
2777 * an arbitrary scalar. 2846 * an arbitrary scalar. Disallow all math except
2847 * pointer subtraction
2778 */ 2848 */
2779 if (!env->allow_ptr_leaks) { 2849 if (opcode == BPF_SUB){
2780 verbose(env, "R%d pointer %s pointer prohibited\n", 2850 mark_reg_unknown(env, regs, insn->dst_reg);
2781 insn->dst_reg, 2851 return 0;
2782 bpf_alu_string[opcode >> 4]);
2783 return -EACCES;
2784 } 2852 }
2785 mark_reg_unknown(env, regs, insn->dst_reg); 2853 verbose(env, "R%d pointer %s pointer prohibited\n",
2786 return 0; 2854 insn->dst_reg,
2855 bpf_alu_string[opcode >> 4]);
2856 return -EACCES;
2787 } else { 2857 } else {
2788 /* scalar += pointer 2858 /* scalar += pointer
2789 * This is legal, but we have to reverse our 2859 * This is legal, but we have to reverse our
2790 * src/dest handling in computing the range 2860 * src/dest handling in computing the range
2791 */ 2861 */
2792 rc = adjust_ptr_min_max_vals(env, insn, 2862 return adjust_ptr_min_max_vals(env, insn,
2793 src_reg, dst_reg); 2863 src_reg, dst_reg);
2794 if (rc == -EACCES && env->allow_ptr_leaks) {
2795 /* scalar += unknown scalar */
2796 __mark_reg_unknown(&off_reg);
2797 return adjust_scalar_min_max_vals(
2798 env, insn,
2799 dst_reg, off_reg);
2800 }
2801 return rc;
2802 } 2864 }
2803 } else if (ptr_reg) { 2865 } else if (ptr_reg) {
2804 /* pointer += scalar */ 2866 /* pointer += scalar */
2805 rc = adjust_ptr_min_max_vals(env, insn, 2867 return adjust_ptr_min_max_vals(env, insn,
2806 dst_reg, src_reg); 2868 dst_reg, src_reg);
2807 if (rc == -EACCES && env->allow_ptr_leaks) {
2808 /* unknown scalar += scalar */
2809 __mark_reg_unknown(dst_reg);
2810 return adjust_scalar_min_max_vals(
2811 env, insn, dst_reg, *src_reg);
2812 }
2813 return rc;
2814 } 2869 }
2815 } else { 2870 } else {
2816 /* Pretend the src is a reg with a known value, since we only 2871 /* Pretend the src is a reg with a known value, since we only
@@ -2819,17 +2874,9 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2819 off_reg.type = SCALAR_VALUE; 2874 off_reg.type = SCALAR_VALUE;
2820 __mark_reg_known(&off_reg, insn->imm); 2875 __mark_reg_known(&off_reg, insn->imm);
2821 src_reg = &off_reg; 2876 src_reg = &off_reg;
2822 if (ptr_reg) { /* pointer += K */ 2877 if (ptr_reg) /* pointer += K */
2823 rc = adjust_ptr_min_max_vals(env, insn, 2878 return adjust_ptr_min_max_vals(env, insn,
2824 ptr_reg, src_reg); 2879 ptr_reg, src_reg);
2825 if (rc == -EACCES && env->allow_ptr_leaks) {
2826 /* unknown scalar += K */
2827 __mark_reg_unknown(dst_reg);
2828 return adjust_scalar_min_max_vals(
2829 env, insn, dst_reg, off_reg);
2830 }
2831 return rc;
2832 }
2833 } 2880 }
2834 2881
2835 /* Got here implies adding two SCALAR_VALUEs */ 2882 /* Got here implies adding two SCALAR_VALUEs */
@@ -2926,17 +2973,20 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
2926 return -EACCES; 2973 return -EACCES;
2927 } 2974 }
2928 mark_reg_unknown(env, regs, insn->dst_reg); 2975 mark_reg_unknown(env, regs, insn->dst_reg);
2929 /* high 32 bits are known zero. */ 2976 coerce_reg_to_size(&regs[insn->dst_reg], 4);
2930 regs[insn->dst_reg].var_off = tnum_cast(
2931 regs[insn->dst_reg].var_off, 4);
2932 __update_reg_bounds(&regs[insn->dst_reg]);
2933 } 2977 }
2934 } else { 2978 } else {
2935 /* case: R = imm 2979 /* case: R = imm
2936 * remember the value we stored into this reg 2980 * remember the value we stored into this reg
2937 */ 2981 */
2938 regs[insn->dst_reg].type = SCALAR_VALUE; 2982 regs[insn->dst_reg].type = SCALAR_VALUE;
2939 __mark_reg_known(regs + insn->dst_reg, insn->imm); 2983 if (BPF_CLASS(insn->code) == BPF_ALU64) {
2984 __mark_reg_known(regs + insn->dst_reg,
2985 insn->imm);
2986 } else {
2987 __mark_reg_known(regs + insn->dst_reg,
2988 (u32)insn->imm);
2989 }
2940 } 2990 }
2941 2991
2942 } else if (opcode > BPF_END) { 2992 } else if (opcode > BPF_END) {
@@ -4013,15 +4063,14 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
4013 return range_within(rold, rcur) && 4063 return range_within(rold, rcur) &&
4014 tnum_in(rold->var_off, rcur->var_off); 4064 tnum_in(rold->var_off, rcur->var_off);
4015 } else { 4065 } else {
4016 /* if we knew anything about the old value, we're not 4066 /* We're trying to use a pointer in place of a scalar.
4017 * equal, because we can't know anything about the 4067 * Even if the scalar was unbounded, this could lead to
4018 * scalar value of the pointer in the new value. 4068 * pointer leaks because scalars are allowed to leak
4069 * while pointers are not. We could make this safe in
4070 * special cases if root is calling us, but it's
4071 * probably not worth the hassle.
4019 */ 4072 */
4020 return rold->umin_value == 0 && 4073 return false;
4021 rold->umax_value == U64_MAX &&
4022 rold->smin_value == S64_MIN &&
4023 rold->smax_value == S64_MAX &&
4024 tnum_is_unknown(rold->var_off);
4025 } 4074 }
4026 case PTR_TO_MAP_VALUE: 4075 case PTR_TO_MAP_VALUE:
4027 /* If the new min/max/var_off satisfy the old ones and 4076 /* If the new min/max/var_off satisfy the old ones and
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 13d6881f908b..ec999f32c840 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -434,17 +434,22 @@ static struct pid *good_sigevent(sigevent_t * event)
434{ 434{
435 struct task_struct *rtn = current->group_leader; 435 struct task_struct *rtn = current->group_leader;
436 436
437 if ((event->sigev_notify & SIGEV_THREAD_ID ) && 437 switch (event->sigev_notify) {
438 (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) || 438 case SIGEV_SIGNAL | SIGEV_THREAD_ID:
439 !same_thread_group(rtn, current) || 439 rtn = find_task_by_vpid(event->sigev_notify_thread_id);
440 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) 440 if (!rtn || !same_thread_group(rtn, current))
441 return NULL;
442 /* FALLTHRU */
443 case SIGEV_SIGNAL:
444 case SIGEV_THREAD:
445 if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX)
446 return NULL;
447 /* FALLTHRU */
448 case SIGEV_NONE:
449 return task_pid(rtn);
450 default:
441 return NULL; 451 return NULL;
442 452 }
443 if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
444 ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
445 return NULL;
446
447 return task_pid(rtn);
448} 453}
449 454
450static struct k_itimer * alloc_posix_timer(void) 455static struct k_itimer * alloc_posix_timer(void)
@@ -669,7 +674,7 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting)
669 struct timespec64 ts64; 674 struct timespec64 ts64;
670 bool sig_none; 675 bool sig_none;
671 676
672 sig_none = (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE; 677 sig_none = timr->it_sigev_notify == SIGEV_NONE;
673 iv = timr->it_interval; 678 iv = timr->it_interval;
674 679
675 /* interval timer ? */ 680 /* interval timer ? */
@@ -856,7 +861,7 @@ int common_timer_set(struct k_itimer *timr, int flags,
856 861
857 timr->it_interval = timespec64_to_ktime(new_setting->it_interval); 862 timr->it_interval = timespec64_to_ktime(new_setting->it_interval);
858 expires = timespec64_to_ktime(new_setting->it_value); 863 expires = timespec64_to_ktime(new_setting->it_value);
859 sigev_none = (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE; 864 sigev_none = timr->it_sigev_notify == SIGEV_NONE;
860 865
861 kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none); 866 kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none);
862 timr->it_active = !sigev_none; 867 timr->it_active = !sigev_none;
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index aa8812ae6776..9e9748089270 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -435,6 +435,41 @@ loop:
435 return 0; 435 return 0;
436} 436}
437 437
438static int bpf_fill_ld_abs_vlan_push_pop2(struct bpf_test *self)
439{
440 struct bpf_insn *insn;
441
442 insn = kmalloc_array(16, sizeof(*insn), GFP_KERNEL);
443 if (!insn)
444 return -ENOMEM;
445
446 /* Due to func address being non-const, we need to
447 * assemble this here.
448 */
449 insn[0] = BPF_MOV64_REG(R6, R1);
450 insn[1] = BPF_LD_ABS(BPF_B, 0);
451 insn[2] = BPF_LD_ABS(BPF_H, 0);
452 insn[3] = BPF_LD_ABS(BPF_W, 0);
453 insn[4] = BPF_MOV64_REG(R7, R6);
454 insn[5] = BPF_MOV64_IMM(R6, 0);
455 insn[6] = BPF_MOV64_REG(R1, R7);
456 insn[7] = BPF_MOV64_IMM(R2, 1);
457 insn[8] = BPF_MOV64_IMM(R3, 2);
458 insn[9] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
459 bpf_skb_vlan_push_proto.func - __bpf_call_base);
460 insn[10] = BPF_MOV64_REG(R6, R7);
461 insn[11] = BPF_LD_ABS(BPF_B, 0);
462 insn[12] = BPF_LD_ABS(BPF_H, 0);
463 insn[13] = BPF_LD_ABS(BPF_W, 0);
464 insn[14] = BPF_MOV64_IMM(R0, 42);
465 insn[15] = BPF_EXIT_INSN();
466
467 self->u.ptr.insns = insn;
468 self->u.ptr.len = 16;
469
470 return 0;
471}
472
438static int bpf_fill_jump_around_ld_abs(struct bpf_test *self) 473static int bpf_fill_jump_around_ld_abs(struct bpf_test *self)
439{ 474{
440 unsigned int len = BPF_MAXINSNS; 475 unsigned int len = BPF_MAXINSNS;
@@ -6066,6 +6101,14 @@ static struct bpf_test tests[] = {
6066 {}, 6101 {},
6067 { {0x1, 0x42 } }, 6102 { {0x1, 0x42 } },
6068 }, 6103 },
6104 {
6105 "LD_ABS with helper changing skb data",
6106 { },
6107 INTERNAL,
6108 { 0x34 },
6109 { { ETH_HLEN, 42 } },
6110 .fill_helper = bpf_fill_ld_abs_vlan_push_pop2,
6111 },
6069}; 6112};
6070 6113
6071static struct net_device dev; 6114static struct net_device dev;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 84b2dc76f140..b5f940ce0143 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -882,13 +882,10 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
882 if (IS_ERR(dev)) 882 if (IS_ERR(dev))
883 return PTR_ERR(dev); 883 return PTR_ERR(dev);
884 884
885 if (bdi_debug_register(bdi, dev_name(dev))) {
886 device_destroy(bdi_class, dev->devt);
887 return -ENOMEM;
888 }
889 cgwb_bdi_register(bdi); 885 cgwb_bdi_register(bdi);
890 bdi->dev = dev; 886 bdi->dev = dev;
891 887
888 bdi_debug_register(bdi, dev_name(dev));
892 set_bit(WB_registered, &bdi->wb.state); 889 set_bit(WB_registered, &bdi->wb.state);
893 890
894 spin_lock_bh(&bdi_lock); 891 spin_lock_bh(&bdi_lock);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index d0ef0a8e8831..015f465c514b 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1262,19 +1262,20 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev,
1262 struct net_bridge *br = netdev_priv(dev); 1262 struct net_bridge *br = netdev_priv(dev);
1263 int err; 1263 int err;
1264 1264
1265 err = register_netdevice(dev);
1266 if (err)
1267 return err;
1268
1265 if (tb[IFLA_ADDRESS]) { 1269 if (tb[IFLA_ADDRESS]) {
1266 spin_lock_bh(&br->lock); 1270 spin_lock_bh(&br->lock);
1267 br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); 1271 br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
1268 spin_unlock_bh(&br->lock); 1272 spin_unlock_bh(&br->lock);
1269 } 1273 }
1270 1274
1271 err = register_netdevice(dev);
1272 if (err)
1273 return err;
1274
1275 err = br_changelink(dev, tb, data, extack); 1275 err = br_changelink(dev, tb, data, extack);
1276 if (err) 1276 if (err)
1277 unregister_netdevice(dev); 1277 br_dev_delete(dev, NULL);
1278
1278 return err; 1279 return err;
1279} 1280}
1280 1281
diff --git a/net/core/dev.c b/net/core/dev.c
index c7db39926769..59ead3910ab7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3937,7 +3937,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
3937 hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0, 3937 hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
3938 troom > 0 ? troom + 128 : 0, GFP_ATOMIC)) 3938 troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
3939 goto do_drop; 3939 goto do_drop;
3940 if (troom > 0 && __skb_linearize(skb)) 3940 if (skb_linearize(skb))
3941 goto do_drop; 3941 goto do_drop;
3942 } 3942 }
3943 3943
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b797832565d3..60a71be75aea 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -267,7 +267,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
267 spin_lock_bh(&net->nsid_lock); 267 spin_lock_bh(&net->nsid_lock);
268 peer = idr_find(&net->netns_ids, id); 268 peer = idr_find(&net->netns_ids, id);
269 if (peer) 269 if (peer)
270 get_net(peer); 270 peer = maybe_get_net(peer);
271 spin_unlock_bh(&net->nsid_lock); 271 spin_unlock_bh(&net->nsid_lock);
272 rcu_read_unlock(); 272 rcu_read_unlock();
273 273
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a592ca025fc4..a3cb0be4c6f3 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1178,7 +1178,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
1178 u32 d_off; 1178 u32 d_off;
1179 1179
1180 if (!num_frags) 1180 if (!num_frags)
1181 return 0; 1181 goto release;
1182 1182
1183 if (skb_shared(skb) || skb_unclone(skb, gfp_mask)) 1183 if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
1184 return -EINVAL; 1184 return -EINVAL;
@@ -1238,6 +1238,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
1238 __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off); 1238 __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
1239 skb_shinfo(skb)->nr_frags = new_frags; 1239 skb_shinfo(skb)->nr_frags = new_frags;
1240 1240
1241release:
1241 skb_zcopy_clear(skb, false); 1242 skb_zcopy_clear(skb, false);
1242 return 0; 1243 return 0;
1243} 1244}
@@ -3654,8 +3655,6 @@ normal:
3654 3655
3655 skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & 3656 skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
3656 SKBTX_SHARED_FRAG; 3657 SKBTX_SHARED_FRAG;
3657 if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC))
3658 goto err;
3659 3658
3660 while (pos < offset + len) { 3659 while (pos < offset + len) {
3661 if (i >= nfrags) { 3660 if (i >= nfrags) {
@@ -3681,6 +3680,8 @@ normal:
3681 3680
3682 if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) 3681 if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
3683 goto err; 3682 goto err;
3683 if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
3684 goto err;
3684 3685
3685 *nskb_frag = *frag; 3686 *nskb_frag = *frag;
3686 __skb_frag_ref(nskb_frag); 3687 __skb_frag_ref(nskb_frag);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f52d27a422c3..08259d078b1c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1298,14 +1298,19 @@ err_table_hash_alloc:
1298 1298
1299static void ip_fib_net_exit(struct net *net) 1299static void ip_fib_net_exit(struct net *net)
1300{ 1300{
1301 unsigned int i; 1301 int i;
1302 1302
1303 rtnl_lock(); 1303 rtnl_lock();
1304#ifdef CONFIG_IP_MULTIPLE_TABLES 1304#ifdef CONFIG_IP_MULTIPLE_TABLES
1305 RCU_INIT_POINTER(net->ipv4.fib_main, NULL); 1305 RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
1306 RCU_INIT_POINTER(net->ipv4.fib_default, NULL); 1306 RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
1307#endif 1307#endif
1308 for (i = 0; i < FIB_TABLE_HASHSZ; i++) { 1308 /* Destroy the tables in reverse order to guarantee that the
1309 * local table, ID 255, is destroyed before the main table, ID
1310 * 254. This is necessary as the local table may contain
1311 * references to data contained in the main table.
1312 */
1313 for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
1309 struct hlist_head *head = &net->ipv4.fib_table_hash[i]; 1314 struct hlist_head *head = &net->ipv4.fib_table_hash[i];
1310 struct hlist_node *tmp; 1315 struct hlist_node *tmp;
1311 struct fib_table *tb; 1316 struct fib_table *tb;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f04d944f8abe..c586597da20d 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -698,7 +698,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
698 698
699 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 699 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
700 int type = nla_type(nla); 700 int type = nla_type(nla);
701 u32 val; 701 u32 fi_val, val;
702 702
703 if (!type) 703 if (!type)
704 continue; 704 continue;
@@ -715,7 +715,11 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
715 val = nla_get_u32(nla); 715 val = nla_get_u32(nla);
716 } 716 }
717 717
718 if (fi->fib_metrics->metrics[type - 1] != val) 718 fi_val = fi->fib_metrics->metrics[type - 1];
719 if (type == RTAX_FEATURES)
720 fi_val &= ~DST_FEATURE_ECN_CA;
721
722 if (fi_val != val)
719 return false; 723 return false;
720 } 724 }
721 725
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 90c912307814..78365094f56c 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1336,6 +1336,7 @@ static const struct net_device_ops erspan_netdev_ops = {
1336static void ipgre_tap_setup(struct net_device *dev) 1336static void ipgre_tap_setup(struct net_device *dev)
1337{ 1337{
1338 ether_setup(dev); 1338 ether_setup(dev);
1339 dev->max_mtu = 0;
1339 dev->netdev_ops = &gre_tap_netdev_ops; 1340 dev->netdev_ops = &gre_tap_netdev_ops;
1340 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1341 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1341 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1342 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c26f71234b9c..c9441ca45399 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -210,7 +210,6 @@ lookup_protocol:
210 np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; 210 np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
211 np->mc_loop = 1; 211 np->mc_loop = 1;
212 np->pmtudisc = IPV6_PMTUDISC_WANT; 212 np->pmtudisc = IPV6_PMTUDISC_WANT;
213 np->autoflowlabel = ip6_default_np_autolabel(net);
214 np->repflow = net->ipv6.sysctl.flowlabel_reflect; 213 np->repflow = net->ipv6.sysctl.flowlabel_reflect;
215 sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; 214 sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
216 215
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 8451d00b210b..97f148f15429 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1772,6 +1772,7 @@ static void ip6gre_tap_setup(struct net_device *dev)
1772 1772
1773 ether_setup(dev); 1773 ether_setup(dev);
1774 1774
1775 dev->max_mtu = 0;
1775 dev->netdev_ops = &ip6gre_tap_netdev_ops; 1776 dev->netdev_ops = &ip6gre_tap_netdev_ops;
1776 dev->needs_free_netdev = true; 1777 dev->needs_free_netdev = true;
1777 dev->priv_destructor = ip6gre_dev_free; 1778 dev->priv_destructor = ip6gre_dev_free;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 176d74fb3b4d..ece2781a31b2 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -166,6 +166,14 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
166 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 166 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
167} 167}
168 168
169static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
170{
171 if (!np->autoflowlabel_set)
172 return ip6_default_np_autolabel(net);
173 else
174 return np->autoflowlabel;
175}
176
169/* 177/*
170 * xmit an sk_buff (used by TCP, SCTP and DCCP) 178 * xmit an sk_buff (used by TCP, SCTP and DCCP)
171 * Note : socket lock is not held for SYNACK packets, but might be modified 179 * Note : socket lock is not held for SYNACK packets, but might be modified
@@ -230,7 +238,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
230 hlimit = ip6_dst_hoplimit(dst); 238 hlimit = ip6_dst_hoplimit(dst);
231 239
232 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, 240 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
233 np->autoflowlabel, fl6)); 241 ip6_autoflowlabel(net, np), fl6));
234 242
235 hdr->payload_len = htons(seg_len); 243 hdr->payload_len = htons(seg_len);
236 hdr->nexthdr = proto; 244 hdr->nexthdr = proto;
@@ -1626,7 +1634,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
1626 1634
1627 ip6_flow_hdr(hdr, v6_cork->tclass, 1635 ip6_flow_hdr(hdr, v6_cork->tclass,
1628 ip6_make_flowlabel(net, skb, fl6->flowlabel, 1636 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1629 np->autoflowlabel, fl6)); 1637 ip6_autoflowlabel(net, np), fl6));
1630 hdr->hop_limit = v6_cork->hop_limit; 1638 hdr->hop_limit = v6_cork->hop_limit;
1631 hdr->nexthdr = proto; 1639 hdr->nexthdr = proto;
1632 hdr->saddr = fl6->saddr; 1640 hdr->saddr = fl6->saddr;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 6ff2f21ae3fc..8a4610e84e58 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1126,8 +1126,13 @@ route_lookup:
1126 max_headroom += 8; 1126 max_headroom += 8;
1127 mtu -= 8; 1127 mtu -= 8;
1128 } 1128 }
1129 if (mtu < IPV6_MIN_MTU) 1129 if (skb->protocol == htons(ETH_P_IPV6)) {
1130 mtu = IPV6_MIN_MTU; 1130 if (mtu < IPV6_MIN_MTU)
1131 mtu = IPV6_MIN_MTU;
1132 } else if (mtu < 576) {
1133 mtu = 576;
1134 }
1135
1131 if (skb_dst(skb) && !t->parms.collect_md) 1136 if (skb_dst(skb) && !t->parms.collect_md)
1132 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 1137 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
1133 if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { 1138 if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index b9404feabd78..2d4680e0376f 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -886,6 +886,7 @@ pref_skip_coa:
886 break; 886 break;
887 case IPV6_AUTOFLOWLABEL: 887 case IPV6_AUTOFLOWLABEL:
888 np->autoflowlabel = valbool; 888 np->autoflowlabel = valbool;
889 np->autoflowlabel_set = 1;
889 retv = 0; 890 retv = 0;
890 break; 891 break;
891 case IPV6_RECVFRAGSIZE: 892 case IPV6_RECVFRAGSIZE:
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b3f4d19b3ca5..2490280b3394 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2336,6 +2336,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
2336 } 2336 }
2337 2337
2338 rt->dst.flags |= DST_HOST; 2338 rt->dst.flags |= DST_HOST;
2339 rt->dst.input = ip6_input;
2339 rt->dst.output = ip6_output; 2340 rt->dst.output = ip6_output;
2340 rt->rt6i_gateway = fl6->daddr; 2341 rt->rt6i_gateway = fl6->daddr;
2341 rt->rt6i_dst.addr = fl6->daddr; 2342 rt->rt6i_dst.addr = fl6->daddr;
@@ -4297,19 +4298,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4297 if (!ipv6_addr_any(&fl6.saddr)) 4298 if (!ipv6_addr_any(&fl6.saddr))
4298 flags |= RT6_LOOKUP_F_HAS_SADDR; 4299 flags |= RT6_LOOKUP_F_HAS_SADDR;
4299 4300
4300 if (!fibmatch) 4301 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
4301 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
4302 else
4303 dst = ip6_route_lookup(net, &fl6, 0);
4304 4302
4305 rcu_read_unlock(); 4303 rcu_read_unlock();
4306 } else { 4304 } else {
4307 fl6.flowi6_oif = oif; 4305 fl6.flowi6_oif = oif;
4308 4306
4309 if (!fibmatch) 4307 dst = ip6_route_output(net, NULL, &fl6);
4310 dst = ip6_route_output(net, NULL, &fl6);
4311 else
4312 dst = ip6_route_lookup(net, &fl6, 0);
4313 } 4308 }
4314 4309
4315 4310
@@ -4326,6 +4321,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4326 goto errout; 4321 goto errout;
4327 } 4322 }
4328 4323
4324 if (fibmatch && rt->from) {
4325 struct rt6_info *ort = rt->from;
4326
4327 dst_hold(&ort->dst);
4328 ip6_rt_put(rt);
4329 rt = ort;
4330 }
4331
4329 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 4332 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
4330 if (!skb) { 4333 if (!skb) {
4331 ip6_rt_put(rt); 4334 ip6_rt_put(rt);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 76d050aba7a4..56b8e7167790 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -579,6 +579,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
579 return -EINVAL; 579 return -EINVAL;
580 580
581 skb_reset_network_header(skb); 581 skb_reset_network_header(skb);
582 key->eth.type = skb->protocol;
582 } else { 583 } else {
583 eth = eth_hdr(skb); 584 eth = eth_hdr(skb);
584 ether_addr_copy(key->eth.src, eth->h_source); 585 ether_addr_copy(key->eth.src, eth->h_source);
@@ -592,15 +593,23 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
592 if (unlikely(parse_vlan(skb, key))) 593 if (unlikely(parse_vlan(skb, key)))
593 return -ENOMEM; 594 return -ENOMEM;
594 595
595 skb->protocol = parse_ethertype(skb); 596 key->eth.type = parse_ethertype(skb);
596 if (unlikely(skb->protocol == htons(0))) 597 if (unlikely(key->eth.type == htons(0)))
597 return -ENOMEM; 598 return -ENOMEM;
598 599
600 /* Multiple tagged packets need to retain TPID to satisfy
601 * skb_vlan_pop(), which will later shift the ethertype into
602 * skb->protocol.
603 */
604 if (key->eth.cvlan.tci & htons(VLAN_TAG_PRESENT))
605 skb->protocol = key->eth.cvlan.tpid;
606 else
607 skb->protocol = key->eth.type;
608
599 skb_reset_network_header(skb); 609 skb_reset_network_header(skb);
600 __skb_push(skb, skb->data - skb_mac_header(skb)); 610 __skb_push(skb, skb->data - skb_mac_header(skb));
601 } 611 }
602 skb_reset_mac_len(skb); 612 skb_reset_mac_len(skb);
603 key->eth.type = skb->protocol;
604 613
605 /* Network layer. */ 614 /* Network layer. */
606 if (key->eth.type == htons(ETH_P_IP)) { 615 if (key->eth.type == htons(ETH_P_IP)) {
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 6fe798c2df1a..8d78e7f4ecc3 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -42,7 +42,6 @@ struct cls_bpf_prog {
42 struct list_head link; 42 struct list_head link;
43 struct tcf_result res; 43 struct tcf_result res;
44 bool exts_integrated; 44 bool exts_integrated;
45 bool offloaded;
46 u32 gen_flags; 45 u32 gen_flags;
47 struct tcf_exts exts; 46 struct tcf_exts exts;
48 u32 handle; 47 u32 handle;
@@ -148,33 +147,37 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
148} 147}
149 148
150static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, 149static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
151 enum tc_clsbpf_command cmd) 150 struct cls_bpf_prog *oldprog)
152{ 151{
153 bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE;
154 struct tcf_block *block = tp->chain->block; 152 struct tcf_block *block = tp->chain->block;
155 bool skip_sw = tc_skip_sw(prog->gen_flags);
156 struct tc_cls_bpf_offload cls_bpf = {}; 153 struct tc_cls_bpf_offload cls_bpf = {};
154 struct cls_bpf_prog *obj;
155 bool skip_sw;
157 int err; 156 int err;
158 157
158 skip_sw = prog && tc_skip_sw(prog->gen_flags);
159 obj = prog ?: oldprog;
160
159 tc_cls_common_offload_init(&cls_bpf.common, tp); 161 tc_cls_common_offload_init(&cls_bpf.common, tp);
160 cls_bpf.command = cmd; 162 cls_bpf.command = TC_CLSBPF_OFFLOAD;
161 cls_bpf.exts = &prog->exts; 163 cls_bpf.exts = &obj->exts;
162 cls_bpf.prog = prog->filter; 164 cls_bpf.prog = prog ? prog->filter : NULL;
163 cls_bpf.name = prog->bpf_name; 165 cls_bpf.oldprog = oldprog ? oldprog->filter : NULL;
164 cls_bpf.exts_integrated = prog->exts_integrated; 166 cls_bpf.name = obj->bpf_name;
165 cls_bpf.gen_flags = prog->gen_flags; 167 cls_bpf.exts_integrated = obj->exts_integrated;
168 cls_bpf.gen_flags = obj->gen_flags;
166 169
167 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); 170 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
168 if (addorrep) { 171 if (prog) {
169 if (err < 0) { 172 if (err < 0) {
170 cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY); 173 cls_bpf_offload_cmd(tp, oldprog, prog);
171 return err; 174 return err;
172 } else if (err > 0) { 175 } else if (err > 0) {
173 prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; 176 prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
174 } 177 }
175 } 178 }
176 179
177 if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW)) 180 if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
178 return -EINVAL; 181 return -EINVAL;
179 182
180 return 0; 183 return 0;
@@ -183,38 +186,17 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
183static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, 186static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
184 struct cls_bpf_prog *oldprog) 187 struct cls_bpf_prog *oldprog)
185{ 188{
186 struct cls_bpf_prog *obj = prog; 189 if (prog && oldprog && prog->gen_flags != oldprog->gen_flags)
187 enum tc_clsbpf_command cmd; 190 return -EINVAL;
188 bool skip_sw;
189 int ret;
190
191 skip_sw = tc_skip_sw(prog->gen_flags) ||
192 (oldprog && tc_skip_sw(oldprog->gen_flags));
193
194 if (oldprog && oldprog->offloaded) {
195 if (!tc_skip_hw(prog->gen_flags)) {
196 cmd = TC_CLSBPF_REPLACE;
197 } else if (!tc_skip_sw(prog->gen_flags)) {
198 obj = oldprog;
199 cmd = TC_CLSBPF_DESTROY;
200 } else {
201 return -EINVAL;
202 }
203 } else {
204 if (tc_skip_hw(prog->gen_flags))
205 return skip_sw ? -EINVAL : 0;
206 cmd = TC_CLSBPF_ADD;
207 }
208
209 ret = cls_bpf_offload_cmd(tp, obj, cmd);
210 if (ret)
211 return ret;
212 191
213 obj->offloaded = true; 192 if (prog && tc_skip_hw(prog->gen_flags))
214 if (oldprog) 193 prog = NULL;
215 oldprog->offloaded = false; 194 if (oldprog && tc_skip_hw(oldprog->gen_flags))
195 oldprog = NULL;
196 if (!prog && !oldprog)
197 return 0;
216 198
217 return 0; 199 return cls_bpf_offload_cmd(tp, prog, oldprog);
218} 200}
219 201
220static void cls_bpf_stop_offload(struct tcf_proto *tp, 202static void cls_bpf_stop_offload(struct tcf_proto *tp,
@@ -222,25 +204,26 @@ static void cls_bpf_stop_offload(struct tcf_proto *tp,
222{ 204{
223 int err; 205 int err;
224 206
225 if (!prog->offloaded) 207 err = cls_bpf_offload_cmd(tp, NULL, prog);
226 return; 208 if (err)
227
228 err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
229 if (err) {
230 pr_err("Stopping hardware offload failed: %d\n", err); 209 pr_err("Stopping hardware offload failed: %d\n", err);
231 return;
232 }
233
234 prog->offloaded = false;
235} 210}
236 211
237static void cls_bpf_offload_update_stats(struct tcf_proto *tp, 212static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
238 struct cls_bpf_prog *prog) 213 struct cls_bpf_prog *prog)
239{ 214{
240 if (!prog->offloaded) 215 struct tcf_block *block = tp->chain->block;
241 return; 216 struct tc_cls_bpf_offload cls_bpf = {};
217
218 tc_cls_common_offload_init(&cls_bpf.common, tp);
219 cls_bpf.command = TC_CLSBPF_STATS;
220 cls_bpf.exts = &prog->exts;
221 cls_bpf.prog = prog->filter;
222 cls_bpf.name = prog->bpf_name;
223 cls_bpf.exts_integrated = prog->exts_integrated;
224 cls_bpf.gen_flags = prog->gen_flags;
242 225
243 cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS); 226 tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false);
244} 227}
245 228
246static int cls_bpf_init(struct tcf_proto *tp) 229static int cls_bpf_init(struct tcf_proto *tp)
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 3f619fdcbf0a..291c97b07058 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -78,6 +78,9 @@ const char *sctp_cname(const union sctp_subtype cid)
78 case SCTP_CID_AUTH: 78 case SCTP_CID_AUTH:
79 return "AUTH"; 79 return "AUTH";
80 80
81 case SCTP_CID_RECONF:
82 return "RECONF";
83
81 default: 84 default:
82 break; 85 break;
83 } 86 }
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 97fae53310e0..0b427100b0d4 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -1093,29 +1093,21 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
1093void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, 1093void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
1094 gfp_t gfp) 1094 gfp_t gfp)
1095{ 1095{
1096 struct sctp_association *asoc; 1096 struct sctp_association *asoc = ulpq->asoc;
1097 __u16 needed, freed; 1097 __u32 freed = 0;
1098 1098 __u16 needed;
1099 asoc = ulpq->asoc;
1100 1099
1101 if (chunk) { 1100 needed = ntohs(chunk->chunk_hdr->length) -
1102 needed = ntohs(chunk->chunk_hdr->length); 1101 sizeof(struct sctp_data_chunk);
1103 needed -= sizeof(struct sctp_data_chunk);
1104 } else
1105 needed = SCTP_DEFAULT_MAXWINDOW;
1106
1107 freed = 0;
1108 1102
1109 if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) { 1103 if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
1110 freed = sctp_ulpq_renege_order(ulpq, needed); 1104 freed = sctp_ulpq_renege_order(ulpq, needed);
1111 if (freed < needed) { 1105 if (freed < needed)
1112 freed += sctp_ulpq_renege_frags(ulpq, needed - freed); 1106 freed += sctp_ulpq_renege_frags(ulpq, needed - freed);
1113 }
1114 } 1107 }
1115 /* If able to free enough room, accept this chunk. */ 1108 /* If able to free enough room, accept this chunk. */
1116 if (chunk && (freed >= needed)) { 1109 if (freed >= needed) {
1117 int retval; 1110 int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
1118 retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
1119 /* 1111 /*
1120 * Enter partial delivery if chunk has not been 1112 * Enter partial delivery if chunk has not been
1121 * delivered; otherwise, drain the reassembly queue. 1113 * delivered; otherwise, drain the reassembly queue.
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 95fec2c057d6..7ebbdeb2a90e 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -351,8 +351,7 @@ void tipc_group_update_member(struct tipc_member *m, int len)
351 if (m->window >= ADV_IDLE) 351 if (m->window >= ADV_IDLE)
352 return; 352 return;
353 353
354 if (!list_empty(&m->congested)) 354 list_del_init(&m->congested);
355 return;
356 355
357 /* Sort member into congested members' list */ 356 /* Sort member into congested members' list */
358 list_for_each_entry_safe(_m, tmp, &grp->congested, congested) { 357 list_for_each_entry_safe(_m, tmp, &grp->congested, congested) {
@@ -648,6 +647,7 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
648 } else if (mtyp == GRP_REMIT_MSG) { 647 } else if (mtyp == GRP_REMIT_MSG) {
649 msg_set_grp_remitted(hdr, m->window); 648 msg_set_grp_remitted(hdr, m->window);
650 } 649 }
650 msg_set_dest_droppable(hdr, true);
651 __skb_queue_tail(xmitq, skb); 651 __skb_queue_tail(xmitq, skb);
652} 652}
653 653
@@ -689,15 +689,16 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
689 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); 689 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
690 __skb_queue_tail(inputq, m->event_msg); 690 __skb_queue_tail(inputq, m->event_msg);
691 } 691 }
692 if (m->window < ADV_IDLE) 692 list_del_init(&m->congested);
693 tipc_group_update_member(m, 0); 693 tipc_group_update_member(m, 0);
694 else
695 list_del_init(&m->congested);
696 return; 694 return;
697 case GRP_LEAVE_MSG: 695 case GRP_LEAVE_MSG:
698 if (!m) 696 if (!m)
699 return; 697 return;
700 m->bc_syncpt = msg_grp_bc_syncpt(hdr); 698 m->bc_syncpt = msg_grp_bc_syncpt(hdr);
699 list_del_init(&m->list);
700 list_del_init(&m->congested);
701 *usr_wakeup = true;
701 702
702 /* Wait until WITHDRAW event is received */ 703 /* Wait until WITHDRAW event is received */
703 if (m->state != MBR_LEAVING) { 704 if (m->state != MBR_LEAVING) {
@@ -709,8 +710,6 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
709 ehdr = buf_msg(m->event_msg); 710 ehdr = buf_msg(m->event_msg);
710 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); 711 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
711 __skb_queue_tail(inputq, m->event_msg); 712 __skb_queue_tail(inputq, m->event_msg);
712 *usr_wakeup = true;
713 list_del_init(&m->congested);
714 return; 713 return;
715 case GRP_ADV_MSG: 714 case GRP_ADV_MSG:
716 if (!m) 715 if (!m)
@@ -862,6 +861,7 @@ void tipc_group_member_evt(struct tipc_group *grp,
862 msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt); 861 msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
863 __skb_queue_tail(inputq, skb); 862 __skb_queue_tail(inputq, skb);
864 } 863 }
864 list_del_init(&m->list);
865 list_del_init(&m->congested); 865 list_del_init(&m->congested);
866 } 866 }
867 *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); 867 *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index d7d6cb00c47b..1d84f91bbfb0 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -23,27 +23,14 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
23cfg80211-y += extra-certs.o 23cfg80211-y += extra-certs.o
24endif 24endif
25 25
26$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509) 26$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
27 @$(kecho) " GEN $@" 27 @$(kecho) " GEN $@"
28 @(set -e; \ 28 @(echo '#include "reg.h"'; \
29 allf=""; \ 29 echo 'const u8 shipped_regdb_certs[] = {'; \
30 for f in $^ ; do \ 30 cat $^ ; \
31 # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \ 31 echo '};'; \
32 thisf=$$(od -An -v -tx1 < $$f | \ 32 echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
33 sed -e 's/ /\n/g' | \ 33 ) > $@
34 sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \
35 sed -e 's/^/0x/;s/$$/,/'); \
36 # file should not be empty - maybe command substitution failed? \
37 test ! -z "$$thisf";\
38 allf=$$allf$$thisf;\
39 done; \
40 ( \
41 echo '#include "reg.h"'; \
42 echo 'const u8 shipped_regdb_certs[] = {'; \
43 echo "$$allf"; \
44 echo '};'; \
45 echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
46 ) >> $@)
47 34
48$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ 35$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
49 $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509) 36 $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
@@ -66,4 +53,6 @@ $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
66 echo "$$allf"; \ 53 echo "$$allf"; \
67 echo '};'; \ 54 echo '};'; \
68 echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \ 55 echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \
69 ) >> $@) 56 ) > $@)
57
58clean-files += shipped-certs.c extra-certs.c
diff --git a/net/wireless/certs/sforshee.hex b/net/wireless/certs/sforshee.hex
new file mode 100644
index 000000000000..14ea66643ffa
--- /dev/null
+++ b/net/wireless/certs/sforshee.hex
@@ -0,0 +1,86 @@
1/* Seth Forshee's regdb certificate */
20x30, 0x82, 0x02, 0xa4, 0x30, 0x82, 0x01, 0x8c,
30x02, 0x09, 0x00, 0xb2, 0x8d, 0xdf, 0x47, 0xae,
40xf9, 0xce, 0xa7, 0x30, 0x0d, 0x06, 0x09, 0x2a,
50x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0b,
60x05, 0x00, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f,
70x06, 0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73,
80x66, 0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30,
90x20, 0x17, 0x0d, 0x31, 0x37, 0x31, 0x30, 0x30,
100x36, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35, 0x5a,
110x18, 0x0f, 0x32, 0x31, 0x31, 0x37, 0x30, 0x39,
120x31, 0x32, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35,
130x5a, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f, 0x06,
140x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73, 0x66,
150x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30, 0x82,
160x01, 0x22, 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86,
170x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05,
180x00, 0x03, 0x82, 0x01, 0x0f, 0x00, 0x30, 0x82,
190x01, 0x0a, 0x02, 0x82, 0x01, 0x01, 0x00, 0xb5,
200x40, 0xe3, 0x9c, 0x28, 0x84, 0x39, 0x03, 0xf2,
210x39, 0xd7, 0x66, 0x2c, 0x41, 0x38, 0x15, 0xac,
220x7e, 0xa5, 0x83, 0x71, 0x25, 0x7e, 0x90, 0x7c,
230x68, 0xdd, 0x6f, 0x3f, 0xd9, 0xd7, 0x59, 0x38,
240x9f, 0x7c, 0x6a, 0x52, 0xc2, 0x03, 0x2a, 0x2d,
250x7e, 0x66, 0xf4, 0x1e, 0xb3, 0x12, 0x70, 0x20,
260x5b, 0xd4, 0x97, 0x32, 0x3d, 0x71, 0x8b, 0x3b,
270x1b, 0x08, 0x17, 0x14, 0x6b, 0x61, 0xc4, 0x57,
280x8b, 0x96, 0x16, 0x1c, 0xfd, 0x24, 0xd5, 0x0b,
290x09, 0xf9, 0x68, 0x11, 0x84, 0xfb, 0xca, 0x51,
300x0c, 0xd1, 0x45, 0x19, 0xda, 0x10, 0x44, 0x8a,
310xd9, 0xfe, 0x76, 0xa9, 0xfd, 0x60, 0x2d, 0x18,
320x0b, 0x28, 0x95, 0xb2, 0x2d, 0xea, 0x88, 0x98,
330xb8, 0xd1, 0x56, 0x21, 0xf0, 0x53, 0x1f, 0xf1,
340x02, 0x6f, 0xe9, 0x46, 0x9b, 0x93, 0x5f, 0x28,
350x90, 0x0f, 0xac, 0x36, 0xfa, 0x68, 0x23, 0x71,
360x57, 0x56, 0xf6, 0xcc, 0xd3, 0xdf, 0x7d, 0x2a,
370xd9, 0x1b, 0x73, 0x45, 0xeb, 0xba, 0x27, 0x85,
380xef, 0x7a, 0x7f, 0xa5, 0xcb, 0x80, 0xc7, 0x30,
390x36, 0xd2, 0x53, 0xee, 0xec, 0xac, 0x1e, 0xe7,
400x31, 0xf1, 0x36, 0xa2, 0x9c, 0x63, 0xc6, 0x65,
410x5b, 0x7f, 0x25, 0x75, 0x68, 0xa1, 0xea, 0xd3,
420x7e, 0x00, 0x5c, 0x9a, 0x5e, 0xd8, 0x20, 0x18,
430x32, 0x77, 0x07, 0x29, 0x12, 0x66, 0x1e, 0x36,
440x73, 0xe7, 0x97, 0x04, 0x41, 0x37, 0xb1, 0xb1,
450x72, 0x2b, 0xf4, 0xa1, 0x29, 0x20, 0x7c, 0x96,
460x79, 0x0b, 0x2b, 0xd0, 0xd8, 0xde, 0xc8, 0x6c,
470x3f, 0x93, 0xfb, 0xc5, 0xee, 0x78, 0x52, 0x11,
480x15, 0x1b, 0x7a, 0xf6, 0xe2, 0x68, 0x99, 0xe7,
490xfb, 0x46, 0x16, 0x84, 0xe3, 0xc7, 0xa1, 0xe6,
500xe0, 0xd2, 0x46, 0xd5, 0xe1, 0xc4, 0x5f, 0xa0,
510x66, 0xf4, 0xda, 0xc4, 0xff, 0x95, 0x1d, 0x02,
520x03, 0x01, 0x00, 0x01, 0x30, 0x0d, 0x06, 0x09,
530x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01,
540x0b, 0x05, 0x00, 0x03, 0x82, 0x01, 0x01, 0x00,
550x87, 0x03, 0xda, 0xf2, 0x82, 0xc2, 0xdd, 0xaf,
560x7c, 0x44, 0x2f, 0x86, 0xd3, 0x5f, 0x4c, 0x93,
570x48, 0xb9, 0xfe, 0x07, 0x17, 0xbb, 0x21, 0xf7,
580x25, 0x23, 0x4e, 0xaa, 0x22, 0x0c, 0x16, 0xb9,
590x73, 0xae, 0x9d, 0x46, 0x7c, 0x75, 0xd9, 0xc3,
600x49, 0x57, 0x47, 0xbf, 0x33, 0xb7, 0x97, 0xec,
610xf5, 0x40, 0x75, 0xc0, 0x46, 0x22, 0xf0, 0xa0,
620x5d, 0x9c, 0x79, 0x13, 0xa1, 0xff, 0xb8, 0xa3,
630x2f, 0x7b, 0x8e, 0x06, 0x3f, 0xc8, 0xb6, 0xe4,
640x6a, 0x28, 0xf2, 0x34, 0x5c, 0x23, 0x3f, 0x32,
650xc0, 0xe6, 0xad, 0x0f, 0xac, 0xcf, 0x55, 0x74,
660x47, 0x73, 0xd3, 0x01, 0x85, 0xb7, 0x0b, 0x22,
670x56, 0x24, 0x7d, 0x9f, 0x09, 0xa9, 0x0e, 0x86,
680x9e, 0x37, 0x5b, 0x9c, 0x6d, 0x02, 0xd9, 0x8c,
690xc8, 0x50, 0x6a, 0xe2, 0x59, 0xf3, 0x16, 0x06,
700xea, 0xb2, 0x42, 0xb5, 0x58, 0xfe, 0xba, 0xd1,
710x81, 0x57, 0x1a, 0xef, 0xb2, 0x38, 0x88, 0x58,
720xf6, 0xaa, 0xc4, 0x2e, 0x8b, 0x5a, 0x27, 0xe4,
730xa5, 0xe8, 0xa4, 0xca, 0x67, 0x5c, 0xac, 0x72,
740x67, 0xc3, 0x6f, 0x13, 0xc3, 0x2d, 0x35, 0x79,
750xd7, 0x8a, 0xe7, 0xf5, 0xd4, 0x21, 0x30, 0x4a,
760xd5, 0xf6, 0xa3, 0xd9, 0x79, 0x56, 0xf2, 0x0f,
770x10, 0xf7, 0x7d, 0xd0, 0x51, 0x93, 0x2f, 0x47,
780xf8, 0x7d, 0x4b, 0x0a, 0x84, 0x55, 0x12, 0x0a,
790x7d, 0x4e, 0x3b, 0x1f, 0x2b, 0x2f, 0xfc, 0x28,
800xb3, 0x69, 0x34, 0xe1, 0x80, 0x80, 0xbb, 0xe2,
810xaf, 0xb9, 0xd6, 0x30, 0xf1, 0x1d, 0x54, 0x87,
820x23, 0x99, 0x9f, 0x51, 0x03, 0x4c, 0x45, 0x7d,
830x02, 0x65, 0x73, 0xab, 0xfd, 0xcf, 0x94, 0xcc,
840x0d, 0x3a, 0x60, 0xfd, 0x3c, 0x14, 0x2f, 0x16,
850x33, 0xa9, 0x21, 0x1f, 0xcb, 0x50, 0xb1, 0x8f,
860x03, 0xee, 0xa0, 0x66, 0xa9, 0x16, 0x79, 0x14,
diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509
deleted file mode 100644
index c6f8f9d6b988..000000000000
--- a/net/wireless/certs/sforshee.x509
+++ /dev/null
Binary files differ
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b1ac23ca20c8..213d0c498c97 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2610,7 +2610,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
2610 case NL80211_IFTYPE_AP: 2610 case NL80211_IFTYPE_AP:
2611 if (wdev->ssid_len && 2611 if (wdev->ssid_len &&
2612 nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid)) 2612 nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
2613 goto nla_put_failure; 2613 goto nla_put_failure_locked;
2614 break; 2614 break;
2615 case NL80211_IFTYPE_STATION: 2615 case NL80211_IFTYPE_STATION:
2616 case NL80211_IFTYPE_P2P_CLIENT: 2616 case NL80211_IFTYPE_P2P_CLIENT:
@@ -2623,7 +2623,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
2623 if (!ssid_ie) 2623 if (!ssid_ie)
2624 break; 2624 break;
2625 if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2)) 2625 if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
2626 goto nla_put_failure; 2626 goto nla_put_failure_locked;
2627 break; 2627 break;
2628 } 2628 }
2629 default: 2629 default:
@@ -2635,6 +2635,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
2635 genlmsg_end(msg, hdr); 2635 genlmsg_end(msg, hdr);
2636 return 0; 2636 return 0;
2637 2637
2638 nla_put_failure_locked:
2639 wdev_unlock(wdev);
2638 nla_put_failure: 2640 nla_put_failure:
2639 genlmsg_cancel(msg, hdr); 2641 genlmsg_cancel(msg, hdr);
2640 return -EMSGSIZE; 2642 return -EMSGSIZE;
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index b3b353d72527..f055ca10bbc1 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -579,15 +579,14 @@ static int snd_rawmidi_info_user(struct snd_rawmidi_substream *substream,
579 return 0; 579 return 0;
580} 580}
581 581
582int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info) 582static int __snd_rawmidi_info_select(struct snd_card *card,
583 struct snd_rawmidi_info *info)
583{ 584{
584 struct snd_rawmidi *rmidi; 585 struct snd_rawmidi *rmidi;
585 struct snd_rawmidi_str *pstr; 586 struct snd_rawmidi_str *pstr;
586 struct snd_rawmidi_substream *substream; 587 struct snd_rawmidi_substream *substream;
587 588
588 mutex_lock(&register_mutex);
589 rmidi = snd_rawmidi_search(card, info->device); 589 rmidi = snd_rawmidi_search(card, info->device);
590 mutex_unlock(&register_mutex);
591 if (!rmidi) 590 if (!rmidi)
592 return -ENXIO; 591 return -ENXIO;
593 if (info->stream < 0 || info->stream > 1) 592 if (info->stream < 0 || info->stream > 1)
@@ -603,6 +602,16 @@ int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info
603 } 602 }
604 return -ENXIO; 603 return -ENXIO;
605} 604}
605
606int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
607{
608 int ret;
609
610 mutex_lock(&register_mutex);
611 ret = __snd_rawmidi_info_select(card, info);
612 mutex_unlock(&register_mutex);
613 return ret;
614}
606EXPORT_SYMBOL(snd_rawmidi_info_select); 615EXPORT_SYMBOL(snd_rawmidi_info_select);
607 616
608static int snd_rawmidi_info_select_user(struct snd_card *card, 617static int snd_rawmidi_info_select_user(struct snd_card *card,
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index c19c81d230bd..b4f1b6e88305 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -55,10 +55,11 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info");
55#define is_kabylake(codec) ((codec)->core.vendor_id == 0x8086280b) 55#define is_kabylake(codec) ((codec)->core.vendor_id == 0x8086280b)
56#define is_geminilake(codec) (((codec)->core.vendor_id == 0x8086280d) || \ 56#define is_geminilake(codec) (((codec)->core.vendor_id == 0x8086280d) || \
57 ((codec)->core.vendor_id == 0x80862800)) 57 ((codec)->core.vendor_id == 0x80862800))
58#define is_cannonlake(codec) ((codec)->core.vendor_id == 0x8086280c)
58#define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \ 59#define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \
59 || is_skylake(codec) || is_broxton(codec) \ 60 || is_skylake(codec) || is_broxton(codec) \
60 || is_kabylake(codec)) || is_geminilake(codec) 61 || is_kabylake(codec)) || is_geminilake(codec) \
61 62 || is_cannonlake(codec)
62#define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882) 63#define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882)
63#define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883) 64#define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883)
64#define is_valleyview_plus(codec) (is_valleyview(codec) || is_cherryview(codec)) 65#define is_valleyview_plus(codec) (is_valleyview(codec) || is_cherryview(codec))
@@ -3841,6 +3842,7 @@ HDA_CODEC_ENTRY(0x80862808, "Broadwell HDMI", patch_i915_hsw_hdmi),
3841HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI", patch_i915_hsw_hdmi), 3842HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI", patch_i915_hsw_hdmi),
3842HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI", patch_i915_hsw_hdmi), 3843HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI", patch_i915_hsw_hdmi),
3843HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI", patch_i915_hsw_hdmi), 3844HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI", patch_i915_hsw_hdmi),
3845HDA_CODEC_ENTRY(0x8086280c, "Cannonlake HDMI", patch_i915_glk_hdmi),
3844HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi), 3846HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi),
3845HDA_CODEC_ENTRY(0x80862800, "Geminilake HDMI", patch_i915_glk_hdmi), 3847HDA_CODEC_ENTRY(0x80862800, "Geminilake HDMI", patch_i915_glk_hdmi),
3846HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi), 3848HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 4b21f71d685c..6a4db00511ab 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5185,6 +5185,22 @@ static void alc233_alc662_fixup_lenovo_dual_codecs(struct hda_codec *codec,
5185 } 5185 }
5186} 5186}
5187 5187
5188/* Forcibly assign NID 0x03 to HP/LO while NID 0x02 to SPK for EQ */
5189static void alc274_fixup_bind_dacs(struct hda_codec *codec,
5190 const struct hda_fixup *fix, int action)
5191{
5192 struct alc_spec *spec = codec->spec;
5193 static hda_nid_t preferred_pairs[] = {
5194 0x21, 0x03, 0x1b, 0x03, 0x16, 0x02,
5195 0
5196 };
5197
5198 if (action != HDA_FIXUP_ACT_PRE_PROBE)
5199 return;
5200
5201 spec->gen.preferred_dacs = preferred_pairs;
5202}
5203
5188/* for hda_fixup_thinkpad_acpi() */ 5204/* for hda_fixup_thinkpad_acpi() */
5189#include "thinkpad_helper.c" 5205#include "thinkpad_helper.c"
5190 5206
@@ -5302,6 +5318,8 @@ enum {
5302 ALC233_FIXUP_LENOVO_MULTI_CODECS, 5318 ALC233_FIXUP_LENOVO_MULTI_CODECS,
5303 ALC294_FIXUP_LENOVO_MIC_LOCATION, 5319 ALC294_FIXUP_LENOVO_MIC_LOCATION,
5304 ALC700_FIXUP_INTEL_REFERENCE, 5320 ALC700_FIXUP_INTEL_REFERENCE,
5321 ALC274_FIXUP_DELL_BIND_DACS,
5322 ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
5305}; 5323};
5306 5324
5307static const struct hda_fixup alc269_fixups[] = { 5325static const struct hda_fixup alc269_fixups[] = {
@@ -6112,6 +6130,21 @@ static const struct hda_fixup alc269_fixups[] = {
6112 {} 6130 {}
6113 } 6131 }
6114 }, 6132 },
6133 [ALC274_FIXUP_DELL_BIND_DACS] = {
6134 .type = HDA_FIXUP_FUNC,
6135 .v.func = alc274_fixup_bind_dacs,
6136 .chained = true,
6137 .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE
6138 },
6139 [ALC274_FIXUP_DELL_AIO_LINEOUT_VERB] = {
6140 .type = HDA_FIXUP_PINS,
6141 .v.pins = (const struct hda_pintbl[]) {
6142 { 0x1b, 0x0401102f },
6143 { }
6144 },
6145 .chained = true,
6146 .chain_id = ALC274_FIXUP_DELL_BIND_DACS
6147 },
6115}; 6148};
6116 6149
6117static const struct snd_pci_quirk alc269_fixup_tbl[] = { 6150static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -6578,7 +6611,7 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
6578 {0x14, 0x90170110}, 6611 {0x14, 0x90170110},
6579 {0x1b, 0x90a70130}, 6612 {0x1b, 0x90a70130},
6580 {0x21, 0x03211020}), 6613 {0x21, 0x03211020}),
6581 SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, 6614 SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
6582 {0x12, 0xb7a60130}, 6615 {0x12, 0xb7a60130},
6583 {0x13, 0xb8a61140}, 6616 {0x13, 0xb8a61140},
6584 {0x16, 0x90170110}, 6617 {0x16, 0x90170110},
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 7c9e361b2200..2b4ceda36291 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -2173,20 +2173,25 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid,
2173 kctl->private_value = (unsigned long)namelist; 2173 kctl->private_value = (unsigned long)namelist;
2174 kctl->private_free = usb_mixer_selector_elem_free; 2174 kctl->private_free = usb_mixer_selector_elem_free;
2175 2175
2176 nameid = uac_selector_unit_iSelector(desc); 2176 /* check the static mapping table at first */
2177 len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name)); 2177 len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name));
2178 if (len)
2179 ;
2180 else if (nameid)
2181 len = snd_usb_copy_string_desc(state, nameid, kctl->id.name,
2182 sizeof(kctl->id.name));
2183 else
2184 len = get_term_name(state, &state->oterm,
2185 kctl->id.name, sizeof(kctl->id.name), 0);
2186
2187 if (!len) { 2178 if (!len) {
2188 strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name)); 2179 /* no mapping ? */
2180 /* if iSelector is given, use it */
2181 nameid = uac_selector_unit_iSelector(desc);
2182 if (nameid)
2183 len = snd_usb_copy_string_desc(state, nameid,
2184 kctl->id.name,
2185 sizeof(kctl->id.name));
2186 /* ... or pick up the terminal name at next */
2187 if (!len)
2188 len = get_term_name(state, &state->oterm,
2189 kctl->id.name, sizeof(kctl->id.name), 0);
2190 /* ... or use the fixed string "USB" as the last resort */
2191 if (!len)
2192 strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name));
2189 2193
2194 /* and add the proper suffix */
2190 if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR) 2195 if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR)
2191 append_ctl_name(kctl, " Clock Source"); 2196 append_ctl_name(kctl, " Clock Source");
2192 else if ((state->oterm.type & 0xff00) == 0x0100) 2197 else if ((state->oterm.type & 0xff00) == 0x0100)
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 77eecaa4db1f..a66ef5777887 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1166,10 +1166,11 @@ static bool is_marantz_denon_dac(unsigned int id)
1166/* TEAC UD-501/UD-503/NT-503 USB DACs need a vendor cmd to switch 1166/* TEAC UD-501/UD-503/NT-503 USB DACs need a vendor cmd to switch
1167 * between PCM/DOP and native DSD mode 1167 * between PCM/DOP and native DSD mode
1168 */ 1168 */
1169static bool is_teac_50X_dac(unsigned int id) 1169static bool is_teac_dsd_dac(unsigned int id)
1170{ 1170{
1171 switch (id) { 1171 switch (id) {
1172 case USB_ID(0x0644, 0x8043): /* TEAC UD-501/UD-503/NT-503 */ 1172 case USB_ID(0x0644, 0x8043): /* TEAC UD-501/UD-503/NT-503 */
1173 case USB_ID(0x0644, 0x8044): /* Esoteric D-05X */
1173 return true; 1174 return true;
1174 } 1175 }
1175 return false; 1176 return false;
@@ -1202,7 +1203,7 @@ int snd_usb_select_mode_quirk(struct snd_usb_substream *subs,
1202 break; 1203 break;
1203 } 1204 }
1204 mdelay(20); 1205 mdelay(20);
1205 } else if (is_teac_50X_dac(subs->stream->chip->usb_id)) { 1206 } else if (is_teac_dsd_dac(subs->stream->chip->usb_id)) {
1206 /* Vendor mode switch cmd is required. */ 1207 /* Vendor mode switch cmd is required. */
1207 switch (fmt->altsetting) { 1208 switch (fmt->altsetting) {
1208 case 3: /* DSD mode (DSD_U32) requested */ 1209 case 3: /* DSD mode (DSD_U32) requested */
@@ -1392,7 +1393,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
1392 } 1393 }
1393 1394
1394 /* TEAC devices with USB DAC functionality */ 1395 /* TEAC devices with USB DAC functionality */
1395 if (is_teac_50X_dac(chip->usb_id)) { 1396 if (is_teac_dsd_dac(chip->usb_id)) {
1396 if (fp->altsetting == 3) 1397 if (fp->altsetting == 3)
1397 return SNDRV_PCM_FMTBIT_DSD_U32_BE; 1398 return SNDRV_PCM_FMTBIT_DSD_U32_BE;
1398 } 1399 }
diff --git a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
index cefe7c7cd4f6..0a8e37a519f2 100644
--- a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
+++ b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
@@ -2,7 +2,7 @@
2#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ 2#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
3#define _UAPI__ASM_BPF_PERF_EVENT_H__ 3#define _UAPI__ASM_BPF_PERF_EVENT_H__
4 4
5#include <asm/ptrace.h> 5#include "ptrace.h"
6 6
7typedef user_pt_regs bpf_user_pt_regs_t; 7typedef user_pt_regs bpf_user_pt_regs_t;
8 8
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 217cf6f95c36..a5684d0968b4 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -478,7 +478,7 @@ class Provider(object):
478 @staticmethod 478 @staticmethod
479 def is_field_wanted(fields_filter, field): 479 def is_field_wanted(fields_filter, field):
480 """Indicate whether field is valid according to fields_filter.""" 480 """Indicate whether field is valid according to fields_filter."""
481 if not fields_filter or fields_filter == "help": 481 if not fields_filter:
482 return True 482 return True
483 return re.match(fields_filter, field) is not None 483 return re.match(fields_filter, field) is not None
484 484
@@ -549,8 +549,8 @@ class TracepointProvider(Provider):
549 549
550 def update_fields(self, fields_filter): 550 def update_fields(self, fields_filter):
551 """Refresh fields, applying fields_filter""" 551 """Refresh fields, applying fields_filter"""
552 self._fields = [field for field in self.get_available_fields() 552 self.fields = [field for field in self.get_available_fields()
553 if self.is_field_wanted(fields_filter, field)] 553 if self.is_field_wanted(fields_filter, field)]
554 554
555 @staticmethod 555 @staticmethod
556 def get_online_cpus(): 556 def get_online_cpus():
@@ -950,7 +950,8 @@ class Tui(object):
950 curses.nocbreak() 950 curses.nocbreak()
951 curses.endwin() 951 curses.endwin()
952 952
953 def get_all_gnames(self): 953 @staticmethod
954 def get_all_gnames():
954 """Returns a list of (pid, gname) tuples of all running guests""" 955 """Returns a list of (pid, gname) tuples of all running guests"""
955 res = [] 956 res = []
956 try: 957 try:
@@ -963,7 +964,7 @@ class Tui(object):
963 # perform a sanity check before calling the more expensive 964 # perform a sanity check before calling the more expensive
964 # function to possibly extract the guest name 965 # function to possibly extract the guest name
965 if ' -name ' in line[1]: 966 if ' -name ' in line[1]:
966 res.append((line[0], self.get_gname_from_pid(line[0]))) 967 res.append((line[0], Tui.get_gname_from_pid(line[0])))
967 child.stdout.close() 968 child.stdout.close()
968 969
969 return res 970 return res
@@ -984,7 +985,8 @@ class Tui(object):
984 except Exception: 985 except Exception:
985 self.screen.addstr(row + 1, 2, 'Not available') 986 self.screen.addstr(row + 1, 2, 'Not available')
986 987
987 def get_pid_from_gname(self, gname): 988 @staticmethod
989 def get_pid_from_gname(gname):
988 """Fuzzy function to convert guest name to QEMU process pid. 990 """Fuzzy function to convert guest name to QEMU process pid.
989 991
990 Returns a list of potential pids, can be empty if no match found. 992 Returns a list of potential pids, can be empty if no match found.
@@ -992,7 +994,7 @@ class Tui(object):
992 994
993 """ 995 """
994 pids = [] 996 pids = []
995 for line in self.get_all_gnames(): 997 for line in Tui.get_all_gnames():
996 if gname == line[1]: 998 if gname == line[1]:
997 pids.append(int(line[0])) 999 pids.append(int(line[0]))
998 1000
@@ -1090,15 +1092,16 @@ class Tui(object):
1090 # sort by totals 1092 # sort by totals
1091 return (0, -stats[x][0]) 1093 return (0, -stats[x][0])
1092 total = 0. 1094 total = 0.
1093 for val in stats.values(): 1095 for key in stats.keys():
1094 total += val[0] 1096 if key.find('(') is -1:
1097 total += stats[key][0]
1095 if self._sorting == SORT_DEFAULT: 1098 if self._sorting == SORT_DEFAULT:
1096 sortkey = sortCurAvg 1099 sortkey = sortCurAvg
1097 else: 1100 else:
1098 sortkey = sortTotal 1101 sortkey = sortTotal
1102 tavg = 0
1099 for key in sorted(stats.keys(), key=sortkey): 1103 for key in sorted(stats.keys(), key=sortkey):
1100 1104 if row >= self.screen.getmaxyx()[0] - 1:
1101 if row >= self.screen.getmaxyx()[0]:
1102 break 1105 break
1103 values = stats[key] 1106 values = stats[key]
1104 if not values[0] and not values[1]: 1107 if not values[0] and not values[1]:
@@ -1110,9 +1113,15 @@ class Tui(object):
1110 self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % 1113 self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' %
1111 (key, values[0], values[0] * 100 / total, 1114 (key, values[0], values[0] * 100 / total,
1112 cur)) 1115 cur))
1116 if cur is not '' and key.find('(') is -1:
1117 tavg += cur
1113 row += 1 1118 row += 1
1114 if row == 3: 1119 if row == 3:
1115 self.screen.addstr(4, 1, 'No matching events reported yet') 1120 self.screen.addstr(4, 1, 'No matching events reported yet')
1121 else:
1122 self.screen.addstr(row, 1, '%-40s %10d %8s' %
1123 ('Total', total, tavg if tavg else ''),
1124 curses.A_BOLD)
1116 self.screen.refresh() 1125 self.screen.refresh()
1117 1126
1118 def show_msg(self, text): 1127 def show_msg(self, text):
@@ -1358,7 +1367,7 @@ class Tui(object):
1358 if char == 'x': 1367 if char == 'x':
1359 self.update_drilldown() 1368 self.update_drilldown()
1360 # prevents display of current values on next refresh 1369 # prevents display of current values on next refresh
1361 self.stats.get() 1370 self.stats.get(self._display_guests)
1362 except KeyboardInterrupt: 1371 except KeyboardInterrupt:
1363 break 1372 break
1364 except curses.error: 1373 except curses.error:
@@ -1451,16 +1460,13 @@ Press any other key to refresh statistics immediately.
1451 try: 1460 try:
1452 pids = Tui.get_pid_from_gname(val) 1461 pids = Tui.get_pid_from_gname(val)
1453 except: 1462 except:
1454 raise optparse.OptionValueError('Error while searching for guest ' 1463 sys.exit('Error while searching for guest "{}". Use "-p" to '
1455 '"{}", use "-p" to specify a pid ' 1464 'specify a pid instead?'.format(val))
1456 'instead'.format(val))
1457 if len(pids) == 0: 1465 if len(pids) == 0:
1458 raise optparse.OptionValueError('No guest by the name "{}" ' 1466 sys.exit('Error: No guest by the name "{}" found'.format(val))
1459 'found'.format(val))
1460 if len(pids) > 1: 1467 if len(pids) > 1:
1461 raise optparse.OptionValueError('Multiple processes found (pids: ' 1468 sys.exit('Error: Multiple processes found (pids: {}). Use "-p" '
1462 '{}) - use "-p" to specify a pid ' 1469 'to specify the desired pid'.format(" ".join(pids)))
1463 'instead'.format(" ".join(pids)))
1464 parser.values.pid = pids[0] 1470 parser.values.pid = pids[0]
1465 1471
1466 optparser = optparse.OptionParser(description=description_text, 1472 optparser = optparse.OptionParser(description=description_text,
@@ -1518,7 +1524,16 @@ Press any other key to refresh statistics immediately.
1518 help='restrict statistics to guest by name', 1524 help='restrict statistics to guest by name',
1519 callback=cb_guest_to_pid, 1525 callback=cb_guest_to_pid,
1520 ) 1526 )
1521 (options, _) = optparser.parse_args(sys.argv) 1527 options, unkn = optparser.parse_args(sys.argv)
1528 if len(unkn) != 1:
1529 sys.exit('Error: Extra argument(s): ' + ' '.join(unkn[1:]))
1530 try:
1531 # verify that we were passed a valid regex up front
1532 re.compile(options.fields)
1533 except re.error:
1534 sys.exit('Error: "' + options.fields + '" is not a valid regular '
1535 'expression')
1536
1522 return options 1537 return options
1523 1538
1524 1539
@@ -1564,16 +1579,13 @@ def main():
1564 1579
1565 stats = Stats(options) 1580 stats = Stats(options)
1566 1581
1567 if options.fields == "help": 1582 if options.fields == 'help':
1568 event_list = "\n" 1583 stats.fields_filter = None
1569 s = stats.get() 1584 event_list = []
1570 for key in s.keys(): 1585 for key in stats.get().keys():
1571 if key.find('(') != -1: 1586 event_list.append(key.split('(', 1)[0])
1572 key = key[0:key.find('(')] 1587 sys.stdout.write(' ' + '\n '.join(sorted(set(event_list))) + '\n')
1573 if event_list.find('\n' + key + '\n') == -1: 1588 sys.exit(0)
1574 event_list += key + '\n'
1575 sys.stdout.write(event_list)
1576 return ""
1577 1589
1578 if options.log: 1590 if options.log:
1579 log(stats) 1591 log(stats)
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt
index e5cf836be8a1..b5b3810c9e94 100644
--- a/tools/kvm/kvm_stat/kvm_stat.txt
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -50,6 +50,8 @@ INTERACTIVE COMMANDS
50*s*:: set update interval 50*s*:: set update interval
51 51
52*x*:: toggle reporting of stats for child trace events 52*x*:: toggle reporting of stats for child trace events
53 :: *Note*: The stats for the parents summarize the respective child trace
54 events
53 55
54Press any other key to refresh statistics immediately. 56Press any other key to refresh statistics immediately.
55 57
@@ -86,7 +88,7 @@ OPTIONS
86 88
87-f<fields>:: 89-f<fields>::
88--fields=<fields>:: 90--fields=<fields>::
89 fields to display (regex) 91 fields to display (regex), "-f help" for a list of available events
90 92
91-h:: 93-h::
92--help:: 94--help::
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index a1fcb0c31d02..f1fdb36269f2 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -11,7 +11,7 @@ ifneq ($(wildcard $(GENHDR)),)
11endif 11endif
12 12
13CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include 13CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
14LDLIBS += -lcap -lelf 14LDLIBS += -lcap -lelf -lrt
15 15
16TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ 16TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
17 test_align test_verifier_log test_dev_cgroup 17 test_align test_verifier_log test_dev_cgroup
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 3914f7a4585a..c940505c2978 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -647,8 +647,8 @@ try:
647 647
648 start_test("Test asking for TC offload of two filters...") 648 start_test("Test asking for TC offload of two filters...")
649 sim.cls_bpf_add_filter(obj, da=True, skip_sw=True) 649 sim.cls_bpf_add_filter(obj, da=True, skip_sw=True)
650 sim.cls_bpf_add_filter(obj, da=True, skip_sw=True) 650 ret, _ = sim.cls_bpf_add_filter(obj, da=True, skip_sw=True, fail=False)
651 # The above will trigger a splat until TC cls_bpf drivers are fixed 651 fail(ret == 0, "Managed to offload two TC filters at the same time")
652 652
653 sim.tc_flush_filters(bound=2, total=2) 653 sim.tc_flush_filters(bound=2, total=2)
654 654
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 6472ca98690e..09087ab12293 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -441,7 +441,7 @@ static void test_bpf_obj_id(void)
441 info_len != sizeof(struct bpf_map_info) || 441 info_len != sizeof(struct bpf_map_info) ||
442 strcmp((char *)map_infos[i].name, expected_map_name), 442 strcmp((char *)map_infos[i].name, expected_map_name),
443 "get-map-info(fd)", 443 "get-map-info(fd)",
444 "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n", 444 "err %d errno %d type %d(%d) info_len %u(%Zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
445 err, errno, 445 err, errno,
446 map_infos[i].type, BPF_MAP_TYPE_ARRAY, 446 map_infos[i].type, BPF_MAP_TYPE_ARRAY,
447 info_len, sizeof(struct bpf_map_info), 447 info_len, sizeof(struct bpf_map_info),
@@ -485,7 +485,7 @@ static void test_bpf_obj_id(void)
485 *(int *)prog_infos[i].map_ids != map_infos[i].id || 485 *(int *)prog_infos[i].map_ids != map_infos[i].id ||
486 strcmp((char *)prog_infos[i].name, expected_prog_name), 486 strcmp((char *)prog_infos[i].name, expected_prog_name),
487 "get-prog-info(fd)", 487 "get-prog-info(fd)",
488 "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n", 488 "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
489 err, errno, i, 489 err, errno, i,
490 prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER, 490 prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
491 info_len, sizeof(struct bpf_prog_info), 491 info_len, sizeof(struct bpf_prog_info),
@@ -553,7 +553,7 @@ static void test_bpf_obj_id(void)
553 memcmp(&prog_info, &prog_infos[i], info_len) || 553 memcmp(&prog_info, &prog_infos[i], info_len) ||
554 *(int *)prog_info.map_ids != saved_map_id, 554 *(int *)prog_info.map_ids != saved_map_id,
555 "get-prog-info(next_id->fd)", 555 "get-prog-info(next_id->fd)",
556 "err %d errno %d info_len %u(%lu) memcmp %d map_id %u(%u)\n", 556 "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n",
557 err, errno, info_len, sizeof(struct bpf_prog_info), 557 err, errno, info_len, sizeof(struct bpf_prog_info),
558 memcmp(&prog_info, &prog_infos[i], info_len), 558 memcmp(&prog_info, &prog_infos[i], info_len),
559 *(int *)prog_info.map_ids, saved_map_id); 559 *(int *)prog_info.map_ids, saved_map_id);
@@ -599,7 +599,7 @@ static void test_bpf_obj_id(void)
599 memcmp(&map_info, &map_infos[i], info_len) || 599 memcmp(&map_info, &map_infos[i], info_len) ||
600 array_value != array_magic_value, 600 array_value != array_magic_value,
601 "check get-map-info(next_id->fd)", 601 "check get-map-info(next_id->fd)",
602 "err %d errno %d info_len %u(%lu) memcmp %d array_value %llu(%llu)\n", 602 "err %d errno %d info_len %u(%Zu) memcmp %d array_value %llu(%llu)\n",
603 err, errno, info_len, sizeof(struct bpf_map_info), 603 err, errno, info_len, sizeof(struct bpf_map_info),
604 memcmp(&map_info, &map_infos[i], info_len), 604 memcmp(&map_info, &map_infos[i], info_len),
605 array_value, array_magic_value); 605 array_value, array_magic_value);
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3bacff0d6f91..d38334abb990 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -423,9 +423,7 @@ static struct bpf_test tests[] = {
423 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), 423 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
424 BPF_EXIT_INSN(), 424 BPF_EXIT_INSN(),
425 }, 425 },
426 .errstr_unpriv = "R1 subtraction from stack pointer", 426 .errstr = "R1 subtraction from stack pointer",
427 .result_unpriv = REJECT,
428 .errstr = "R1 invalid mem access",
429 .result = REJECT, 427 .result = REJECT,
430 }, 428 },
431 { 429 {
@@ -607,7 +605,6 @@ static struct bpf_test tests[] = {
607 }, 605 },
608 .errstr = "misaligned stack access", 606 .errstr = "misaligned stack access",
609 .result = REJECT, 607 .result = REJECT,
610 .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
611 }, 608 },
612 { 609 {
613 "invalid map_fd for function call", 610 "invalid map_fd for function call",
@@ -1798,7 +1795,6 @@ static struct bpf_test tests[] = {
1798 }, 1795 },
1799 .result = REJECT, 1796 .result = REJECT,
1800 .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8", 1797 .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8",
1801 .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
1802 }, 1798 },
1803 { 1799 {
1804 "PTR_TO_STACK store/load - bad alignment on reg", 1800 "PTR_TO_STACK store/load - bad alignment on reg",
@@ -1811,7 +1807,6 @@ static struct bpf_test tests[] = {
1811 }, 1807 },
1812 .result = REJECT, 1808 .result = REJECT,
1813 .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8", 1809 .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8",
1814 .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
1815 }, 1810 },
1816 { 1811 {
1817 "PTR_TO_STACK store/load - out of bounds low", 1812 "PTR_TO_STACK store/load - out of bounds low",
@@ -1863,9 +1858,8 @@ static struct bpf_test tests[] = {
1863 BPF_MOV64_IMM(BPF_REG_0, 0), 1858 BPF_MOV64_IMM(BPF_REG_0, 0),
1864 BPF_EXIT_INSN(), 1859 BPF_EXIT_INSN(),
1865 }, 1860 },
1866 .result = ACCEPT, 1861 .result = REJECT,
1867 .result_unpriv = REJECT, 1862 .errstr = "R1 pointer += pointer",
1868 .errstr_unpriv = "R1 pointer += pointer",
1869 }, 1863 },
1870 { 1864 {
1871 "unpriv: neg pointer", 1865 "unpriv: neg pointer",
@@ -2593,7 +2587,8 @@ static struct bpf_test tests[] = {
2593 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 2587 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
2594 offsetof(struct __sk_buff, data)), 2588 offsetof(struct __sk_buff, data)),
2595 BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4), 2589 BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4),
2596 BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), 2590 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
2591 offsetof(struct __sk_buff, len)),
2597 BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49), 2592 BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49),
2598 BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49), 2593 BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49),
2599 BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2), 2594 BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
@@ -2900,7 +2895,7 @@ static struct bpf_test tests[] = {
2900 BPF_MOV64_IMM(BPF_REG_0, 0), 2895 BPF_MOV64_IMM(BPF_REG_0, 0),
2901 BPF_EXIT_INSN(), 2896 BPF_EXIT_INSN(),
2902 }, 2897 },
2903 .errstr = "invalid access to packet", 2898 .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
2904 .result = REJECT, 2899 .result = REJECT,
2905 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 2900 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
2906 }, 2901 },
@@ -3886,9 +3881,7 @@ static struct bpf_test tests[] = {
3886 BPF_EXIT_INSN(), 3881 BPF_EXIT_INSN(),
3887 }, 3882 },
3888 .fixup_map2 = { 3, 11 }, 3883 .fixup_map2 = { 3, 11 },
3889 .errstr_unpriv = "R0 pointer += pointer", 3884 .errstr = "R0 pointer += pointer",
3890 .errstr = "R0 invalid mem access 'inv'",
3891 .result_unpriv = REJECT,
3892 .result = REJECT, 3885 .result = REJECT,
3893 .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, 3886 .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
3894 }, 3887 },
@@ -3929,7 +3922,7 @@ static struct bpf_test tests[] = {
3929 BPF_EXIT_INSN(), 3922 BPF_EXIT_INSN(),
3930 }, 3923 },
3931 .fixup_map1 = { 4 }, 3924 .fixup_map1 = { 4 },
3932 .errstr = "R4 invalid mem access", 3925 .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
3933 .result = REJECT, 3926 .result = REJECT,
3934 .prog_type = BPF_PROG_TYPE_SCHED_CLS 3927 .prog_type = BPF_PROG_TYPE_SCHED_CLS
3935 }, 3928 },
@@ -3950,7 +3943,7 @@ static struct bpf_test tests[] = {
3950 BPF_EXIT_INSN(), 3943 BPF_EXIT_INSN(),
3951 }, 3944 },
3952 .fixup_map1 = { 4 }, 3945 .fixup_map1 = { 4 },
3953 .errstr = "R4 invalid mem access", 3946 .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
3954 .result = REJECT, 3947 .result = REJECT,
3955 .prog_type = BPF_PROG_TYPE_SCHED_CLS 3948 .prog_type = BPF_PROG_TYPE_SCHED_CLS
3956 }, 3949 },
@@ -3971,7 +3964,7 @@ static struct bpf_test tests[] = {
3971 BPF_EXIT_INSN(), 3964 BPF_EXIT_INSN(),
3972 }, 3965 },
3973 .fixup_map1 = { 4 }, 3966 .fixup_map1 = { 4 },
3974 .errstr = "R4 invalid mem access", 3967 .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
3975 .result = REJECT, 3968 .result = REJECT,
3976 .prog_type = BPF_PROG_TYPE_SCHED_CLS 3969 .prog_type = BPF_PROG_TYPE_SCHED_CLS
3977 }, 3970 },
@@ -5196,10 +5189,8 @@ static struct bpf_test tests[] = {
5196 BPF_EXIT_INSN(), 5189 BPF_EXIT_INSN(),
5197 }, 5190 },
5198 .fixup_map2 = { 3 }, 5191 .fixup_map2 = { 3 },
5199 .errstr_unpriv = "R0 bitwise operator &= on pointer", 5192 .errstr = "R0 bitwise operator &= on pointer",
5200 .errstr = "invalid mem access 'inv'",
5201 .result = REJECT, 5193 .result = REJECT,
5202 .result_unpriv = REJECT,
5203 }, 5194 },
5204 { 5195 {
5205 "map element value illegal alu op, 2", 5196 "map element value illegal alu op, 2",
@@ -5215,10 +5206,8 @@ static struct bpf_test tests[] = {
5215 BPF_EXIT_INSN(), 5206 BPF_EXIT_INSN(),
5216 }, 5207 },
5217 .fixup_map2 = { 3 }, 5208 .fixup_map2 = { 3 },
5218 .errstr_unpriv = "R0 32-bit pointer arithmetic prohibited", 5209 .errstr = "R0 32-bit pointer arithmetic prohibited",
5219 .errstr = "invalid mem access 'inv'",
5220 .result = REJECT, 5210 .result = REJECT,
5221 .result_unpriv = REJECT,
5222 }, 5211 },
5223 { 5212 {
5224 "map element value illegal alu op, 3", 5213 "map element value illegal alu op, 3",
@@ -5234,10 +5223,8 @@ static struct bpf_test tests[] = {
5234 BPF_EXIT_INSN(), 5223 BPF_EXIT_INSN(),
5235 }, 5224 },
5236 .fixup_map2 = { 3 }, 5225 .fixup_map2 = { 3 },
5237 .errstr_unpriv = "R0 pointer arithmetic with /= operator", 5226 .errstr = "R0 pointer arithmetic with /= operator",
5238 .errstr = "invalid mem access 'inv'",
5239 .result = REJECT, 5227 .result = REJECT,
5240 .result_unpriv = REJECT,
5241 }, 5228 },
5242 { 5229 {
5243 "map element value illegal alu op, 4", 5230 "map element value illegal alu op, 4",
@@ -6020,8 +6007,7 @@ static struct bpf_test tests[] = {
6020 BPF_EXIT_INSN(), 6007 BPF_EXIT_INSN(),
6021 }, 6008 },
6022 .fixup_map_in_map = { 3 }, 6009 .fixup_map_in_map = { 3 },
6023 .errstr = "R1 type=inv expected=map_ptr", 6010 .errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
6024 .errstr_unpriv = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
6025 .result = REJECT, 6011 .result = REJECT,
6026 }, 6012 },
6027 { 6013 {
@@ -6118,6 +6104,30 @@ static struct bpf_test tests[] = {
6118 .result = ACCEPT, 6104 .result = ACCEPT,
6119 }, 6105 },
6120 { 6106 {
6107 "ld_abs: tests on r6 and skb data reload helper",
6108 .insns = {
6109 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
6110 BPF_LD_ABS(BPF_B, 0),
6111 BPF_LD_ABS(BPF_H, 0),
6112 BPF_LD_ABS(BPF_W, 0),
6113 BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
6114 BPF_MOV64_IMM(BPF_REG_6, 0),
6115 BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
6116 BPF_MOV64_IMM(BPF_REG_2, 1),
6117 BPF_MOV64_IMM(BPF_REG_3, 2),
6118 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6119 BPF_FUNC_skb_vlan_push),
6120 BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
6121 BPF_LD_ABS(BPF_B, 0),
6122 BPF_LD_ABS(BPF_H, 0),
6123 BPF_LD_ABS(BPF_W, 0),
6124 BPF_MOV64_IMM(BPF_REG_0, 42),
6125 BPF_EXIT_INSN(),
6126 },
6127 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
6128 .result = ACCEPT,
6129 },
6130 {
6121 "ld_ind: check calling conv, r1", 6131 "ld_ind: check calling conv, r1",
6122 .insns = { 6132 .insns = {
6123 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), 6133 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
@@ -6301,7 +6311,7 @@ static struct bpf_test tests[] = {
6301 BPF_EXIT_INSN(), 6311 BPF_EXIT_INSN(),
6302 }, 6312 },
6303 .fixup_map1 = { 3 }, 6313 .fixup_map1 = { 3 },
6304 .errstr = "R0 min value is negative", 6314 .errstr = "unbounded min value",
6305 .result = REJECT, 6315 .result = REJECT,
6306 }, 6316 },
6307 { 6317 {
@@ -6325,7 +6335,7 @@ static struct bpf_test tests[] = {
6325 BPF_EXIT_INSN(), 6335 BPF_EXIT_INSN(),
6326 }, 6336 },
6327 .fixup_map1 = { 3 }, 6337 .fixup_map1 = { 3 },
6328 .errstr = "R0 min value is negative", 6338 .errstr = "unbounded min value",
6329 .result = REJECT, 6339 .result = REJECT,
6330 }, 6340 },
6331 { 6341 {
@@ -6351,7 +6361,7 @@ static struct bpf_test tests[] = {
6351 BPF_EXIT_INSN(), 6361 BPF_EXIT_INSN(),
6352 }, 6362 },
6353 .fixup_map1 = { 3 }, 6363 .fixup_map1 = { 3 },
6354 .errstr = "R8 invalid mem access 'inv'", 6364 .errstr = "unbounded min value",
6355 .result = REJECT, 6365 .result = REJECT,
6356 }, 6366 },
6357 { 6367 {
@@ -6376,7 +6386,7 @@ static struct bpf_test tests[] = {
6376 BPF_EXIT_INSN(), 6386 BPF_EXIT_INSN(),
6377 }, 6387 },
6378 .fixup_map1 = { 3 }, 6388 .fixup_map1 = { 3 },
6379 .errstr = "R8 invalid mem access 'inv'", 6389 .errstr = "unbounded min value",
6380 .result = REJECT, 6390 .result = REJECT,
6381 }, 6391 },
6382 { 6392 {
@@ -6424,7 +6434,7 @@ static struct bpf_test tests[] = {
6424 BPF_EXIT_INSN(), 6434 BPF_EXIT_INSN(),
6425 }, 6435 },
6426 .fixup_map1 = { 3 }, 6436 .fixup_map1 = { 3 },
6427 .errstr = "R0 min value is negative", 6437 .errstr = "unbounded min value",
6428 .result = REJECT, 6438 .result = REJECT,
6429 }, 6439 },
6430 { 6440 {
@@ -6495,7 +6505,7 @@ static struct bpf_test tests[] = {
6495 BPF_EXIT_INSN(), 6505 BPF_EXIT_INSN(),
6496 }, 6506 },
6497 .fixup_map1 = { 3 }, 6507 .fixup_map1 = { 3 },
6498 .errstr = "R0 min value is negative", 6508 .errstr = "unbounded min value",
6499 .result = REJECT, 6509 .result = REJECT,
6500 }, 6510 },
6501 { 6511 {
@@ -6546,7 +6556,7 @@ static struct bpf_test tests[] = {
6546 BPF_EXIT_INSN(), 6556 BPF_EXIT_INSN(),
6547 }, 6557 },
6548 .fixup_map1 = { 3 }, 6558 .fixup_map1 = { 3 },
6549 .errstr = "R0 min value is negative", 6559 .errstr = "unbounded min value",
6550 .result = REJECT, 6560 .result = REJECT,
6551 }, 6561 },
6552 { 6562 {
@@ -6573,7 +6583,7 @@ static struct bpf_test tests[] = {
6573 BPF_EXIT_INSN(), 6583 BPF_EXIT_INSN(),
6574 }, 6584 },
6575 .fixup_map1 = { 3 }, 6585 .fixup_map1 = { 3 },
6576 .errstr = "R0 min value is negative", 6586 .errstr = "unbounded min value",
6577 .result = REJECT, 6587 .result = REJECT,
6578 }, 6588 },
6579 { 6589 {
@@ -6599,7 +6609,7 @@ static struct bpf_test tests[] = {
6599 BPF_EXIT_INSN(), 6609 BPF_EXIT_INSN(),
6600 }, 6610 },
6601 .fixup_map1 = { 3 }, 6611 .fixup_map1 = { 3 },
6602 .errstr = "R0 min value is negative", 6612 .errstr = "unbounded min value",
6603 .result = REJECT, 6613 .result = REJECT,
6604 }, 6614 },
6605 { 6615 {
@@ -6628,7 +6638,7 @@ static struct bpf_test tests[] = {
6628 BPF_EXIT_INSN(), 6638 BPF_EXIT_INSN(),
6629 }, 6639 },
6630 .fixup_map1 = { 3 }, 6640 .fixup_map1 = { 3 },
6631 .errstr = "R0 min value is negative", 6641 .errstr = "unbounded min value",
6632 .result = REJECT, 6642 .result = REJECT,
6633 }, 6643 },
6634 { 6644 {
@@ -6658,7 +6668,7 @@ static struct bpf_test tests[] = {
6658 BPF_JMP_IMM(BPF_JA, 0, 0, -7), 6668 BPF_JMP_IMM(BPF_JA, 0, 0, -7),
6659 }, 6669 },
6660 .fixup_map1 = { 4 }, 6670 .fixup_map1 = { 4 },
6661 .errstr = "R0 min value is negative", 6671 .errstr = "unbounded min value",
6662 .result = REJECT, 6672 .result = REJECT,
6663 }, 6673 },
6664 { 6674 {
@@ -6686,8 +6696,7 @@ static struct bpf_test tests[] = {
6686 BPF_EXIT_INSN(), 6696 BPF_EXIT_INSN(),
6687 }, 6697 },
6688 .fixup_map1 = { 3 }, 6698 .fixup_map1 = { 3 },
6689 .errstr_unpriv = "R0 pointer comparison prohibited", 6699 .errstr = "unbounded min value",
6690 .errstr = "R0 min value is negative",
6691 .result = REJECT, 6700 .result = REJECT,
6692 .result_unpriv = REJECT, 6701 .result_unpriv = REJECT,
6693 }, 6702 },
@@ -6743,6 +6752,462 @@ static struct bpf_test tests[] = {
6743 .result = REJECT, 6752 .result = REJECT,
6744 }, 6753 },
6745 { 6754 {
6755 "bounds check based on zero-extended MOV",
6756 .insns = {
6757 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6758 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6759 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6760 BPF_LD_MAP_FD(BPF_REG_1, 0),
6761 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6762 BPF_FUNC_map_lookup_elem),
6763 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
6764 /* r2 = 0x0000'0000'ffff'ffff */
6765 BPF_MOV32_IMM(BPF_REG_2, 0xffffffff),
6766 /* r2 = 0 */
6767 BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
6768 /* no-op */
6769 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
6770 /* access at offset 0 */
6771 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6772 /* exit */
6773 BPF_MOV64_IMM(BPF_REG_0, 0),
6774 BPF_EXIT_INSN(),
6775 },
6776 .fixup_map1 = { 3 },
6777 .result = ACCEPT
6778 },
6779 {
6780 "bounds check based on sign-extended MOV. test1",
6781 .insns = {
6782 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6783 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6784 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6785 BPF_LD_MAP_FD(BPF_REG_1, 0),
6786 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6787 BPF_FUNC_map_lookup_elem),
6788 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
6789 /* r2 = 0xffff'ffff'ffff'ffff */
6790 BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
6791 /* r2 = 0xffff'ffff */
6792 BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
6793 /* r0 = <oob pointer> */
6794 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
6795 /* access to OOB pointer */
6796 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6797 /* exit */
6798 BPF_MOV64_IMM(BPF_REG_0, 0),
6799 BPF_EXIT_INSN(),
6800 },
6801 .fixup_map1 = { 3 },
6802 .errstr = "map_value pointer and 4294967295",
6803 .result = REJECT
6804 },
6805 {
6806 "bounds check based on sign-extended MOV. test2",
6807 .insns = {
6808 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6809 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6810 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6811 BPF_LD_MAP_FD(BPF_REG_1, 0),
6812 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6813 BPF_FUNC_map_lookup_elem),
6814 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
6815 /* r2 = 0xffff'ffff'ffff'ffff */
6816 BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
6817 /* r2 = 0xfff'ffff */
6818 BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36),
6819 /* r0 = <oob pointer> */
6820 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
6821 /* access to OOB pointer */
6822 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6823 /* exit */
6824 BPF_MOV64_IMM(BPF_REG_0, 0),
6825 BPF_EXIT_INSN(),
6826 },
6827 .fixup_map1 = { 3 },
6828 .errstr = "R0 min value is outside of the array range",
6829 .result = REJECT
6830 },
6831 {
6832 "bounds check based on reg_off + var_off + insn_off. test1",
6833 .insns = {
6834 BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
6835 offsetof(struct __sk_buff, mark)),
6836 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6837 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6838 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6839 BPF_LD_MAP_FD(BPF_REG_1, 0),
6840 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6841 BPF_FUNC_map_lookup_elem),
6842 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
6843 BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
6844 BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1),
6845 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
6846 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
6847 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
6848 BPF_MOV64_IMM(BPF_REG_0, 0),
6849 BPF_EXIT_INSN(),
6850 },
6851 .fixup_map1 = { 4 },
6852 .errstr = "value_size=8 off=1073741825",
6853 .result = REJECT,
6854 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
6855 },
6856 {
6857 "bounds check based on reg_off + var_off + insn_off. test2",
6858 .insns = {
6859 BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
6860 offsetof(struct __sk_buff, mark)),
6861 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6862 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6863 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6864 BPF_LD_MAP_FD(BPF_REG_1, 0),
6865 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6866 BPF_FUNC_map_lookup_elem),
6867 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
6868 BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
6869 BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1),
6870 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
6871 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
6872 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
6873 BPF_MOV64_IMM(BPF_REG_0, 0),
6874 BPF_EXIT_INSN(),
6875 },
6876 .fixup_map1 = { 4 },
6877 .errstr = "value 1073741823",
6878 .result = REJECT,
6879 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
6880 },
6881 {
6882 "bounds check after truncation of non-boundary-crossing range",
6883 .insns = {
6884 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6885 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6886 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6887 BPF_LD_MAP_FD(BPF_REG_1, 0),
6888 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6889 BPF_FUNC_map_lookup_elem),
6890 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
6891 /* r1 = [0x00, 0xff] */
6892 BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
6893 BPF_MOV64_IMM(BPF_REG_2, 1),
6894 /* r2 = 0x10'0000'0000 */
6895 BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36),
6896 /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */
6897 BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
6898 /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */
6899 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
6900 /* r1 = [0x00, 0xff] */
6901 BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff),
6902 /* r1 = 0 */
6903 BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
6904 /* no-op */
6905 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
6906 /* access at offset 0 */
6907 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6908 /* exit */
6909 BPF_MOV64_IMM(BPF_REG_0, 0),
6910 BPF_EXIT_INSN(),
6911 },
6912 .fixup_map1 = { 3 },
6913 .result = ACCEPT
6914 },
6915 {
6916 "bounds check after truncation of boundary-crossing range (1)",
6917 .insns = {
6918 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6919 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6920 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6921 BPF_LD_MAP_FD(BPF_REG_1, 0),
6922 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6923 BPF_FUNC_map_lookup_elem),
6924 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
6925 /* r1 = [0x00, 0xff] */
6926 BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
6927 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
6928 /* r1 = [0xffff'ff80, 0x1'0000'007f] */
6929 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
6930 /* r1 = [0xffff'ff80, 0xffff'ffff] or
6931 * [0x0000'0000, 0x0000'007f]
6932 */
6933 BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0),
6934 BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
6935 /* r1 = [0x00, 0xff] or
6936 * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
6937 */
6938 BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
6939 /* r1 = 0 or
6940 * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
6941 */
6942 BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
6943 /* no-op or OOB pointer computation */
6944 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
6945 /* potentially OOB access */
6946 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6947 /* exit */
6948 BPF_MOV64_IMM(BPF_REG_0, 0),
6949 BPF_EXIT_INSN(),
6950 },
6951 .fixup_map1 = { 3 },
6952 /* not actually fully unbounded, but the bound is very high */
6953 .errstr = "R0 unbounded memory access",
6954 .result = REJECT
6955 },
6956 {
6957 "bounds check after truncation of boundary-crossing range (2)",
6958 .insns = {
6959 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6960 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6961 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6962 BPF_LD_MAP_FD(BPF_REG_1, 0),
6963 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6964 BPF_FUNC_map_lookup_elem),
6965 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
6966 /* r1 = [0x00, 0xff] */
6967 BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
6968 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
6969 /* r1 = [0xffff'ff80, 0x1'0000'007f] */
6970 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
6971 /* r1 = [0xffff'ff80, 0xffff'ffff] or
6972 * [0x0000'0000, 0x0000'007f]
6973 * difference to previous test: truncation via MOV32
6974 * instead of ALU32.
6975 */
6976 BPF_MOV32_REG(BPF_REG_1, BPF_REG_1),
6977 BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
6978 /* r1 = [0x00, 0xff] or
6979 * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
6980 */
6981 BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
6982 /* r1 = 0 or
6983 * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
6984 */
6985 BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
6986 /* no-op or OOB pointer computation */
6987 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
6988 /* potentially OOB access */
6989 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6990 /* exit */
6991 BPF_MOV64_IMM(BPF_REG_0, 0),
6992 BPF_EXIT_INSN(),
6993 },
6994 .fixup_map1 = { 3 },
6995 /* not actually fully unbounded, but the bound is very high */
6996 .errstr = "R0 unbounded memory access",
6997 .result = REJECT
6998 },
6999 {
7000 "bounds check after wrapping 32-bit addition",
7001 .insns = {
7002 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7003 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7004 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7005 BPF_LD_MAP_FD(BPF_REG_1, 0),
7006 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7007 BPF_FUNC_map_lookup_elem),
7008 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
7009 /* r1 = 0x7fff'ffff */
7010 BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff),
7011 /* r1 = 0xffff'fffe */
7012 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
7013 /* r1 = 0 */
7014 BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2),
7015 /* no-op */
7016 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
7017 /* access at offset 0 */
7018 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
7019 /* exit */
7020 BPF_MOV64_IMM(BPF_REG_0, 0),
7021 BPF_EXIT_INSN(),
7022 },
7023 .fixup_map1 = { 3 },
7024 .result = ACCEPT
7025 },
7026 {
7027 "bounds check after shift with oversized count operand",
7028 .insns = {
7029 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7030 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7031 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7032 BPF_LD_MAP_FD(BPF_REG_1, 0),
7033 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7034 BPF_FUNC_map_lookup_elem),
7035 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
7036 BPF_MOV64_IMM(BPF_REG_2, 32),
7037 BPF_MOV64_IMM(BPF_REG_1, 1),
7038 /* r1 = (u32)1 << (u32)32 = ? */
7039 BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2),
7040 /* r1 = [0x0000, 0xffff] */
7041 BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff),
7042 /* computes unknown pointer, potentially OOB */
7043 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
7044 /* potentially OOB access */
7045 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
7046 /* exit */
7047 BPF_MOV64_IMM(BPF_REG_0, 0),
7048 BPF_EXIT_INSN(),
7049 },
7050 .fixup_map1 = { 3 },
7051 .errstr = "R0 max value is outside of the array range",
7052 .result = REJECT
7053 },
7054 {
7055 "bounds check after right shift of maybe-negative number",
7056 .insns = {
7057 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7058 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7059 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7060 BPF_LD_MAP_FD(BPF_REG_1, 0),
7061 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7062 BPF_FUNC_map_lookup_elem),
7063 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
7064 /* r1 = [0x00, 0xff] */
7065 BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
7066 /* r1 = [-0x01, 0xfe] */
7067 BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
7068 /* r1 = 0 or 0xff'ffff'ffff'ffff */
7069 BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
7070 /* r1 = 0 or 0xffff'ffff'ffff */
7071 BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
7072 /* computes unknown pointer, potentially OOB */
7073 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
7074 /* potentially OOB access */
7075 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
7076 /* exit */
7077 BPF_MOV64_IMM(BPF_REG_0, 0),
7078 BPF_EXIT_INSN(),
7079 },
7080 .fixup_map1 = { 3 },
7081 .errstr = "R0 unbounded memory access",
7082 .result = REJECT
7083 },
7084 {
7085 "bounds check map access with off+size signed 32bit overflow. test1",
7086 .insns = {
7087 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7088 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7089 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7090 BPF_LD_MAP_FD(BPF_REG_1, 0),
7091 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7092 BPF_FUNC_map_lookup_elem),
7093 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
7094 BPF_EXIT_INSN(),
7095 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe),
7096 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
7097 BPF_JMP_A(0),
7098 BPF_EXIT_INSN(),
7099 },
7100 .fixup_map1 = { 3 },
7101 .errstr = "map_value pointer and 2147483646",
7102 .result = REJECT
7103 },
7104 {
7105 "bounds check map access with off+size signed 32bit overflow. test2",
7106 .insns = {
7107 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7108 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7109 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7110 BPF_LD_MAP_FD(BPF_REG_1, 0),
7111 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7112 BPF_FUNC_map_lookup_elem),
7113 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
7114 BPF_EXIT_INSN(),
7115 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
7116 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
7117 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
7118 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
7119 BPF_JMP_A(0),
7120 BPF_EXIT_INSN(),
7121 },
7122 .fixup_map1 = { 3 },
7123 .errstr = "pointer offset 1073741822",
7124 .result = REJECT
7125 },
7126 {
7127 "bounds check map access with off+size signed 32bit overflow. test3",
7128 .insns = {
7129 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7130 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7131 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7132 BPF_LD_MAP_FD(BPF_REG_1, 0),
7133 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7134 BPF_FUNC_map_lookup_elem),
7135 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
7136 BPF_EXIT_INSN(),
7137 BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
7138 BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
7139 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
7140 BPF_JMP_A(0),
7141 BPF_EXIT_INSN(),
7142 },
7143 .fixup_map1 = { 3 },
7144 .errstr = "pointer offset -1073741822",
7145 .result = REJECT
7146 },
7147 {
7148 "bounds check map access with off+size signed 32bit overflow. test4",
7149 .insns = {
7150 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7151 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7152 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7153 BPF_LD_MAP_FD(BPF_REG_1, 0),
7154 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7155 BPF_FUNC_map_lookup_elem),
7156 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
7157 BPF_EXIT_INSN(),
7158 BPF_MOV64_IMM(BPF_REG_1, 1000000),
7159 BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000),
7160 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
7161 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
7162 BPF_JMP_A(0),
7163 BPF_EXIT_INSN(),
7164 },
7165 .fixup_map1 = { 3 },
7166 .errstr = "map_value pointer and 1000000000000",
7167 .result = REJECT
7168 },
7169 {
7170 "pointer/scalar confusion in state equality check (way 1)",
7171 .insns = {
7172 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7173 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7174 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7175 BPF_LD_MAP_FD(BPF_REG_1, 0),
7176 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7177 BPF_FUNC_map_lookup_elem),
7178 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
7179 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
7180 BPF_JMP_A(1),
7181 BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
7182 BPF_JMP_A(0),
7183 BPF_EXIT_INSN(),
7184 },
7185 .fixup_map1 = { 3 },
7186 .result = ACCEPT,
7187 .result_unpriv = REJECT,
7188 .errstr_unpriv = "R0 leaks addr as return value"
7189 },
7190 {
7191 "pointer/scalar confusion in state equality check (way 2)",
7192 .insns = {
7193 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7194 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7195 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7196 BPF_LD_MAP_FD(BPF_REG_1, 0),
7197 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7198 BPF_FUNC_map_lookup_elem),
7199 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
7200 BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
7201 BPF_JMP_A(1),
7202 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
7203 BPF_EXIT_INSN(),
7204 },
7205 .fixup_map1 = { 3 },
7206 .result = ACCEPT,
7207 .result_unpriv = REJECT,
7208 .errstr_unpriv = "R0 leaks addr as return value"
7209 },
7210 {
6746 "variable-offset ctx access", 7211 "variable-offset ctx access",
6747 .insns = { 7212 .insns = {
6748 /* Get an unknown value */ 7213 /* Get an unknown value */
@@ -6784,6 +7249,71 @@ static struct bpf_test tests[] = {
6784 .prog_type = BPF_PROG_TYPE_LWT_IN, 7249 .prog_type = BPF_PROG_TYPE_LWT_IN,
6785 }, 7250 },
6786 { 7251 {
7252 "indirect variable-offset stack access",
7253 .insns = {
7254 /* Fill the top 8 bytes of the stack */
7255 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7256 /* Get an unknown value */
7257 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
7258 /* Make it small and 4-byte aligned */
7259 BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
7260 BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
7261 /* add it to fp. We now have either fp-4 or fp-8, but
7262 * we don't know which
7263 */
7264 BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
7265 /* dereference it indirectly */
7266 BPF_LD_MAP_FD(BPF_REG_1, 0),
7267 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7268 BPF_FUNC_map_lookup_elem),
7269 BPF_MOV64_IMM(BPF_REG_0, 0),
7270 BPF_EXIT_INSN(),
7271 },
7272 .fixup_map1 = { 5 },
7273 .errstr = "variable stack read R2",
7274 .result = REJECT,
7275 .prog_type = BPF_PROG_TYPE_LWT_IN,
7276 },
7277 {
7278 "direct stack access with 32-bit wraparound. test1",
7279 .insns = {
7280 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
7281 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
7282 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
7283 BPF_MOV32_IMM(BPF_REG_0, 0),
7284 BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
7285 BPF_EXIT_INSN()
7286 },
7287 .errstr = "fp pointer and 2147483647",
7288 .result = REJECT
7289 },
7290 {
7291 "direct stack access with 32-bit wraparound. test2",
7292 .insns = {
7293 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
7294 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
7295 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
7296 BPF_MOV32_IMM(BPF_REG_0, 0),
7297 BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
7298 BPF_EXIT_INSN()
7299 },
7300 .errstr = "fp pointer and 1073741823",
7301 .result = REJECT
7302 },
7303 {
7304 "direct stack access with 32-bit wraparound. test3",
7305 .insns = {
7306 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
7307 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
7308 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
7309 BPF_MOV32_IMM(BPF_REG_0, 0),
7310 BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
7311 BPF_EXIT_INSN()
7312 },
7313 .errstr = "fp pointer offset 1073741822",
7314 .result = REJECT
7315 },
7316 {
6787 "liveness pruning and write screening", 7317 "liveness pruning and write screening",
6788 .insns = { 7318 .insns = {
6789 /* Get an unknown value */ 7319 /* Get an unknown value */
@@ -7105,6 +7635,19 @@ static struct bpf_test tests[] = {
7105 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 7635 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
7106 }, 7636 },
7107 { 7637 {
7638 "pkt_end - pkt_start is allowed",
7639 .insns = {
7640 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
7641 offsetof(struct __sk_buff, data_end)),
7642 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
7643 offsetof(struct __sk_buff, data)),
7644 BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
7645 BPF_EXIT_INSN(),
7646 },
7647 .result = ACCEPT,
7648 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
7649 },
7650 {
7108 "XDP pkt read, pkt_end mangling, bad access 1", 7651 "XDP pkt read, pkt_end mangling, bad access 1",
7109 .insns = { 7652 .insns = {
7110 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 7653 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
@@ -7119,7 +7662,7 @@ static struct bpf_test tests[] = {
7119 BPF_MOV64_IMM(BPF_REG_0, 0), 7662 BPF_MOV64_IMM(BPF_REG_0, 0),
7120 BPF_EXIT_INSN(), 7663 BPF_EXIT_INSN(),
7121 }, 7664 },
7122 .errstr = "R1 offset is outside of the packet", 7665 .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
7123 .result = REJECT, 7666 .result = REJECT,
7124 .prog_type = BPF_PROG_TYPE_XDP, 7667 .prog_type = BPF_PROG_TYPE_XDP,
7125 }, 7668 },
@@ -7138,7 +7681,7 @@ static struct bpf_test tests[] = {
7138 BPF_MOV64_IMM(BPF_REG_0, 0), 7681 BPF_MOV64_IMM(BPF_REG_0, 0),
7139 BPF_EXIT_INSN(), 7682 BPF_EXIT_INSN(),
7140 }, 7683 },
7141 .errstr = "R1 offset is outside of the packet", 7684 .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
7142 .result = REJECT, 7685 .result = REJECT,
7143 .prog_type = BPF_PROG_TYPE_XDP, 7686 .prog_type = BPF_PROG_TYPE_XDP,
7144 }, 7687 },
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index e57b4ac40e72..7177bea1fdfa 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -1,3 +1,4 @@
1CONFIG_USER_NS=y 1CONFIG_USER_NS=y
2CONFIG_BPF_SYSCALL=y 2CONFIG_BPF_SYSCALL=y
3CONFIG_TEST_BPF=m 3CONFIG_TEST_BPF=m
4CONFIG_NUMA=y
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index f9555b1e7f15..cc29a8148328 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -92,16 +92,23 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
92{ 92{
93 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; 93 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
94 struct arch_timer_context *vtimer; 94 struct arch_timer_context *vtimer;
95 u32 cnt_ctl;
95 96
96 if (!vcpu) { 97 /*
97 pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n"); 98 * We may see a timer interrupt after vcpu_put() has been called which
98 return IRQ_NONE; 99 * sets the CPU's vcpu pointer to NULL, because even though the timer
99 } 100 * has been disabled in vtimer_save_state(), the hardware interrupt
100 vtimer = vcpu_vtimer(vcpu); 101 * signal may not have been retired from the interrupt controller yet.
102 */
103 if (!vcpu)
104 return IRQ_HANDLED;
101 105
106 vtimer = vcpu_vtimer(vcpu);
102 if (!vtimer->irq.level) { 107 if (!vtimer->irq.level) {
103 vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); 108 cnt_ctl = read_sysreg_el0(cntv_ctl);
104 if (kvm_timer_irq_can_fire(vtimer)) 109 cnt_ctl &= ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT |
110 ARCH_TIMER_CTRL_IT_MASK;
111 if (cnt_ctl == (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT))
105 kvm_timer_update_irq(vcpu, true, vtimer); 112 kvm_timer_update_irq(vcpu, true, vtimer);
106 } 113 }
107 114
@@ -355,6 +362,7 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu)
355 362
356 /* Disable the virtual timer */ 363 /* Disable the virtual timer */
357 write_sysreg_el0(0, cntv_ctl); 364 write_sysreg_el0(0, cntv_ctl);
365 isb();
358 366
359 vtimer->loaded = false; 367 vtimer->loaded = false;
360out: 368out:
@@ -720,7 +728,7 @@ static int kvm_timer_dying_cpu(unsigned int cpu)
720 return 0; 728 return 0;
721} 729}
722 730
723int kvm_timer_hyp_init(void) 731int kvm_timer_hyp_init(bool has_gic)
724{ 732{
725 struct arch_timer_kvm_info *info; 733 struct arch_timer_kvm_info *info;
726 int err; 734 int err;
@@ -756,10 +764,13 @@ int kvm_timer_hyp_init(void)
756 return err; 764 return err;
757 } 765 }
758 766
759 err = irq_set_vcpu_affinity(host_vtimer_irq, kvm_get_running_vcpus()); 767 if (has_gic) {
760 if (err) { 768 err = irq_set_vcpu_affinity(host_vtimer_irq,
761 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 769 kvm_get_running_vcpus());
762 goto out_free_irq; 770 if (err) {
771 kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
772 goto out_free_irq;
773 }
763 } 774 }
764 775
765 kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); 776 kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
@@ -835,10 +846,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
835no_vgic: 846no_vgic:
836 preempt_disable(); 847 preempt_disable();
837 timer->enabled = 1; 848 timer->enabled = 1;
838 if (!irqchip_in_kernel(vcpu->kvm)) 849 kvm_timer_vcpu_load(vcpu);
839 kvm_timer_vcpu_load_user(vcpu);
840 else
841 kvm_timer_vcpu_load_vgic(vcpu);
842 preempt_enable(); 850 preempt_enable();
843 851
844 return 0; 852 return 0;
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 6b60c98a6e22..2e43f9d42bd5 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -1326,7 +1326,7 @@ static int init_subsystems(void)
1326 /* 1326 /*
1327 * Init HYP architected timer support 1327 * Init HYP architected timer support
1328 */ 1328 */
1329 err = kvm_timer_hyp_init(); 1329 err = kvm_timer_hyp_init(vgic_present);
1330 if (err) 1330 if (err)
1331 goto out; 1331 goto out;
1332 1332
diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c
index b6e715fd3c90..dac7ceb1a677 100644
--- a/virt/kvm/arm/mmio.c
+++ b/virt/kvm/arm/mmio.c
@@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
112 } 112 }
113 113
114 trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, 114 trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
115 data); 115 &data);
116 data = vcpu_data_host_to_guest(vcpu, data, len); 116 data = vcpu_data_host_to_guest(vcpu, data, len);
117 vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data); 117 vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
118 } 118 }
@@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
182 data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt), 182 data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
183 len); 183 len);
184 184
185 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data); 185 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data);
186 kvm_mmio_write_buf(data_buf, len, data); 186 kvm_mmio_write_buf(data_buf, len, data);
187 187
188 ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, 188 ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
189 data_buf); 189 data_buf);
190 } else { 190 } else {
191 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len, 191 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
192 fault_ipa, 0); 192 fault_ipa, NULL);
193 193
194 ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len, 194 ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
195 data_buf); 195 data_buf);
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index b36945d49986..b4b69c2d1012 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -509,8 +509,6 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
509 */ 509 */
510void free_hyp_pgds(void) 510void free_hyp_pgds(void)
511{ 511{
512 unsigned long addr;
513
514 mutex_lock(&kvm_hyp_pgd_mutex); 512 mutex_lock(&kvm_hyp_pgd_mutex);
515 513
516 if (boot_hyp_pgd) { 514 if (boot_hyp_pgd) {
@@ -521,10 +519,10 @@ void free_hyp_pgds(void)
521 519
522 if (hyp_pgd) { 520 if (hyp_pgd) {
523 unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE); 521 unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE);
524 for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) 522 unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET),
525 unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE); 523 (uintptr_t)high_memory - PAGE_OFFSET);
526 for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) 524 unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START),
527 unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE); 525 VMALLOC_END - VMALLOC_START);
528 526
529 free_pages((unsigned long)hyp_pgd, hyp_pgd_order); 527 free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
530 hyp_pgd = NULL; 528 hyp_pgd = NULL;