aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2017-10-02 19:35:04 -0400
committerDave Airlie <airlied@redhat.com>2017-10-02 19:35:04 -0400
commitebec44a2456fbe5fe18aae88f6010f6878f0cb4a (patch)
tree427734722bdf3e807333329f33a6dbd6e95ec747
parent659333de48268550b5f09fcd45f76459d737b946 (diff)
parent9e66317d3c92ddaab330c125dfe9d06eee268aff (diff)
BackMerge tag 'v4.14-rc3' into drm-next
Linux 4.14-rc3 Requested by Daniel for the tracing build fix in fixes.
-rw-r--r--Documentation/cpu-freq/index.txt2
-rw-r--r--Documentation/devicetree/bindings/leds/ams,as3645a.txt28
-rw-r--r--MAINTAINERS6
-rw-r--r--Makefile6
-rw-r--r--arch/arm/boot/dts/omap3-n950-n9.dtsi10
-rw-r--r--arch/arm64/include/asm/pgtable.h2
-rw-r--r--arch/arm64/kernel/head.S1
-rw-r--r--arch/arm64/mm/fault.c2
-rw-r--r--arch/microblaze/Kconfig2
-rw-r--r--arch/microblaze/include/uapi/asm/Kbuild1
-rw-r--r--arch/microblaze/kernel/dma.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S14
-rw-r--r--arch/um/kernel/time.c4
-rw-r--r--arch/x86/events/intel/cstate.c4
-rw-r--r--arch/x86/events/intel/rapl.c3
-rw-r--r--arch/x86/events/intel/uncore_snbep.c4
-rw-r--r--arch/x86/events/msr.c8
-rw-r--r--arch/x86/ia32/ia32_signal.c2
-rw-r--r--arch/x86/include/asm/asm.h8
-rw-r--r--arch/x86/include/asm/fpu/internal.h90
-rw-r--r--arch/x86/include/asm/fpu/types.h32
-rw-r--r--arch/x86/include/asm/fpu/xstate.h12
-rw-r--r--arch/x86/include/asm/thread_info.h11
-rw-r--r--arch/x86/include/asm/trace/fpu.h11
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/include/asm/xen/hypercall.h4
-rw-r--r--arch/x86/kernel/fpu/core.c155
-rw-r--r--arch/x86/kernel/fpu/init.c2
-rw-r--r--arch/x86/kernel/fpu/regset.c48
-rw-r--r--arch/x86/kernel/fpu/signal.c37
-rw-r--r--arch/x86/kernel/fpu/xstate.c264
-rw-r--r--arch/x86/kernel/irq_32.c6
-rw-r--r--arch/x86/kernel/ksysfs.c2
-rw-r--r--arch/x86/kernel/kvm.c3
-rw-r--r--arch/x86/kernel/signal.c6
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--arch/x86/kvm/vmx.c206
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--arch/x86/math-emu/fpu_entry.c2
-rw-r--r--arch/x86/mm/extable.c24
-rw-r--r--arch/x86/mm/fault.c47
-rw-r--r--arch/x86/mm/mem_encrypt.c2
-rw-r--r--arch/x86/mm/pkeys.c3
-rw-r--r--arch/x86/mm/tlb.c2
-rw-r--r--arch/x86/xen/mmu_pv.c13
-rw-r--r--block/blk-core.c3
-rw-r--r--block/bsg-lib.c1
-rw-r--r--block/partition-generic.c2
-rw-r--r--drivers/acpi/apei/ghes.c16
-rw-r--r--drivers/base/power/opp/core.c7
-rw-r--r--drivers/block/brd.c2
-rw-r--r--drivers/block/loop.h6
-rw-r--r--drivers/block/nbd.c6
-rw-r--r--drivers/clocksource/numachip.c2
-rw-r--r--drivers/cpufreq/cpufreq-dt-platdev.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c189
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c20
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h1
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem.c3
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c6
-rw-r--r--drivers/gpu/drm/qxl/qxl_display.c41
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c2
-rw-r--r--drivers/gpu/drm/sun4i/Kconfig2
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_hdmi.h2
-rw-r--r--drivers/gpu/drm/tegra/trace.h2
-rw-r--r--drivers/infiniband/core/security.c4
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c14
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c101
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h3
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.c20
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c41
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c50
-rw-r--r--drivers/infiniband/hw/hfi1/platform.c4
-rw-r--r--drivers/infiniband/hw/mlx5/main.c10
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c47
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c27
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c13
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c15
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c30
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c2
-rw-r--r--drivers/iommu/amd_iommu_init.c8
-rw-r--r--drivers/iommu/io-pgtable-arm-v7s.c2
-rw-r--r--drivers/iommu/mtk_iommu.c3
-rw-r--r--drivers/irqchip/irq-mips-gic.c13
-rw-r--r--drivers/leds/leds-as3645a.c29
-rw-r--r--drivers/md/dm-raid.c2
-rw-r--r--drivers/md/md.c72
-rw-r--r--drivers/md/md.h1
-rw-r--r--drivers/md/raid5.c7
-rw-r--r--drivers/mmc/host/sdhci-pci-core.c15
-rw-r--r--drivers/mmc/host/tmio_mmc_core.c47
-rw-r--r--drivers/mtd/mtdpart.c8
-rw-r--r--drivers/mtd/nand/atmel/pmecc.c2
-rw-r--r--drivers/nvme/host/core.c9
-rw-r--r--drivers/nvme/host/fabrics.c18
-rw-r--r--drivers/nvme/host/fc.c21
-rw-r--r--drivers/nvme/host/pci.c34
-rw-r--r--drivers/nvme/host/rdma.c9
-rw-r--r--drivers/nvme/target/core.c9
-rw-r--r--drivers/nvme/target/fabrics-cmd.c9
-rw-r--r--drivers/nvme/target/fc.c24
-rw-r--r--drivers/nvme/target/fcloop.c104
-rw-r--r--drivers/nvme/target/nvmet.h1
-rw-r--r--drivers/pci/pci-sysfs.c11
-rw-r--r--drivers/platform/x86/fujitsu-laptop.c10
-rw-r--r--drivers/scsi/aacraid/aachba.c12
-rw-r--r--drivers/scsi/aacraid/aacraid.h5
-rw-r--r--drivers/scsi/aacraid/linit.c20
-rw-r--r--drivers/scsi/aacraid/src.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c1
-rw-r--r--drivers/scsi/lpfc/lpfc_nvme.c2
-rw-r--r--drivers/scsi/qla2xxx/qla_nvme.c2
-rw-r--r--drivers/scsi/scsi_error.c3
-rw-r--r--drivers/scsi/scsi_transport_fc.c14
-rw-r--r--drivers/scsi/scsi_transport_iscsi.c2
-rw-r--r--drivers/xen/xen-pciback/conf_space_header.c11
-rw-r--r--fs/btrfs/compression.c18
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/disk-io.c9
-rw-r--r--fs/btrfs/extent_io.c8
-rw-r--r--fs/btrfs/inode.c27
-rw-r--r--fs/btrfs/ioctl.c12
-rw-r--r--fs/btrfs/qgroup.c6
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/send.c2
-rw-r--r--fs/btrfs/tree-log.c12
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/direct-io.c49
-rw-r--r--fs/gfs2/glock.c14
-rw-r--r--fs/iomap.c43
-rw-r--r--fs/isofs/inode.c2
-rw-r--r--fs/proc/array.c35
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/quota/quota_v2.c4
-rw-r--r--fs/read_write.c4
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c12
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c17
-rw-r--r--fs/xfs/xfs_aops.c3
-rw-r--r--fs/xfs/xfs_bmap_util.c14
-rw-r--r--fs/xfs/xfs_buf.c2
-rw-r--r--fs/xfs/xfs_error.c2
-rw-r--r--fs/xfs/xfs_file.c39
-rw-r--r--fs/xfs/xfs_inode.c8
-rw-r--r--fs/xfs/xfs_inode_item.c2
-rw-r--r--fs/xfs/xfs_ioctl.c3
-rw-r--r--fs/xfs/xfs_iomap.c7
-rw-r--r--fs/xfs/xfs_iomap.h2
-rw-r--r--fs/xfs/xfs_pnfs.c2
-rw-r--r--fs/xfs/xfs_super.c10
-rw-r--r--include/linux/blkdev.h1
-rw-r--r--include/linux/cpuhotplug.h21
-rw-r--r--include/linux/iommu.h2
-rw-r--r--include/linux/key.h2
-rw-r--r--include/linux/nvme-fc-driver.h13
-rw-r--r--include/linux/nvme.h19
-rw-r--r--include/linux/pci.h2
-rw-r--r--include/linux/sched.h64
-rw-r--r--include/linux/timer.h14
-rw-r--r--include/rdma/ib_verbs.h6
-rw-r--r--include/trace/events/sched.h19
-rw-r--r--include/uapi/rdma/ib_user_verbs.h2
-rw-r--r--ipc/shm.c2
-rw-r--r--kernel/cpu.c506
-rw-r--r--kernel/events/ring_buffer.c20
-rw-r--r--kernel/exit.c23
-rw-r--r--kernel/extable.c45
-rw-r--r--kernel/futex.c33
-rw-r--r--kernel/irq/generic-chip.c1
-rw-r--r--kernel/irq/irqdomain.c4
-rw-r--r--kernel/irq/manage.c4
-rw-r--r--kernel/locking/rwsem-xadd.c27
-rw-r--r--kernel/rcu/tree.c10
-rw-r--r--kernel/sched/core.c24
-rw-r--r--kernel/sched/debug.c2
-rw-r--r--kernel/seccomp.c23
-rw-r--r--kernel/sysctl.c3
-rw-r--r--kernel/trace/blktrace.c18
-rw-r--r--kernel/trace/trace_output.c21
-rw-r--r--kernel/trace/trace_sched_wakeup.c8
-rw-r--r--kernel/trace/trace_stack.c15
-rw-r--r--mm/filemap.c10
-rw-r--r--net/bluetooth/Kconfig10
-rw-r--r--net/bluetooth/hci_sock.c6
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c2
-rw-r--r--scripts/Makefile.build2
-rw-r--r--security/keys/Kconfig4
-rw-r--r--security/keys/big_key.c139
-rw-r--r--security/keys/internal.h2
-rw-r--r--security/keys/key.c6
-rw-r--r--security/keys/keyctl.c13
-rw-r--r--security/keys/keyring.c37
-rw-r--r--security/keys/proc.c8
-rw-r--r--security/keys/process_keys.c6
-rw-r--r--security/keys/request_key_auth.c74
-rw-r--r--tools/arch/s390/include/uapi/asm/kvm.h6
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h2
-rw-r--r--tools/arch/x86/include/asm/disabled-features.h4
-rw-r--r--tools/include/asm-generic/hugetlb_encode.h34
-rw-r--r--tools/include/uapi/asm-generic/mman-common.h14
-rw-r--r--tools/include/uapi/drm/drm.h22
-rw-r--r--tools/include/uapi/drm/i915_drm.h51
-rw-r--r--tools/include/uapi/linux/bpf.h32
-rw-r--r--tools/include/uapi/linux/kvm.h3
-rw-r--r--tools/include/uapi/linux/mman.h24
-rw-r--r--tools/objtool/arch/x86/decode.c11
-rw-r--r--tools/perf/MANIFEST87
-rw-r--r--tools/perf/arch/s390/util/Build1
-rw-r--r--tools/perf/arch/s390/util/sym-handling.c29
-rw-r--r--tools/perf/util/callchain.c35
-rw-r--r--tools/perf/util/evsel.c7
-rw-r--r--tools/perf/util/symbol-elf.c8
-rw-r--r--tools/perf/util/symbol.h3
-rw-r--r--tools/perf/util/syscalltbl.c2
-rw-r--r--tools/testing/selftests/Makefile18
-rw-r--r--tools/testing/selftests/bpf/bpf_util.h17
-rw-r--r--tools/testing/selftests/breakpoints/Makefile8
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc2
-rw-r--r--tools/testing/selftests/futex/Makefile9
-rw-r--r--tools/testing/selftests/intel_pstate/Makefile2
-rwxr-xr-xtools/testing/selftests/intel_pstate/run.sh11
-rw-r--r--tools/testing/selftests/lib.mk48
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/memfd/run_tests.sh0
-rw-r--r--tools/testing/selftests/mqueue/Makefile4
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile6
-rw-r--r--tools/testing/selftests/net/msg_zerocopy.c2
-rwxr-xr-xtools/testing/selftests/net/netdevice.sh2
-rw-r--r--tools/testing/selftests/net/reuseaddr_conflict.c114
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c18
-rw-r--r--tools/testing/selftests/sigaltstack/sas.c4
-rw-r--r--tools/testing/selftests/sync/Makefile24
-rw-r--r--tools/testing/selftests/timers/set-timer-lat.c13
-rw-r--r--tools/testing/selftests/watchdog/Makefile7
236 files changed, 2887 insertions, 1794 deletions
diff --git a/Documentation/cpu-freq/index.txt b/Documentation/cpu-freq/index.txt
index 03a7cee6ac73..c15e75386a05 100644
--- a/Documentation/cpu-freq/index.txt
+++ b/Documentation/cpu-freq/index.txt
@@ -32,8 +32,6 @@ cpufreq-stats.txt - General description of sysfs cpufreq stats.
32 32
33index.txt - File index, Mailing list and Links (this document) 33index.txt - File index, Mailing list and Links (this document)
34 34
35intel-pstate.txt - Intel pstate cpufreq driver specific file.
36
37pcc-cpufreq.txt - PCC cpufreq driver specific file. 35pcc-cpufreq.txt - PCC cpufreq driver specific file.
38 36
39 37
diff --git a/Documentation/devicetree/bindings/leds/ams,as3645a.txt b/Documentation/devicetree/bindings/leds/ams,as3645a.txt
index 12c5ef26ec73..fdc40e354a64 100644
--- a/Documentation/devicetree/bindings/leds/ams,as3645a.txt
+++ b/Documentation/devicetree/bindings/leds/ams,as3645a.txt
@@ -15,11 +15,14 @@ Required properties
15 15
16compatible : Must be "ams,as3645a". 16compatible : Must be "ams,as3645a".
17reg : The I2C address of the device. Typically 0x30. 17reg : The I2C address of the device. Typically 0x30.
18#address-cells : 1
19#size-cells : 0
18 20
19 21
20Required properties of the "flash" child node 22Required properties of the flash child node (0)
21============================================= 23===============================================
22 24
25reg: 0
23flash-timeout-us: Flash timeout in microseconds. The value must be in 26flash-timeout-us: Flash timeout in microseconds. The value must be in
24 the range [100000, 850000] and divisible by 50000. 27 the range [100000, 850000] and divisible by 50000.
25flash-max-microamp: Maximum flash current in microamperes. Has to be 28flash-max-microamp: Maximum flash current in microamperes. Has to be
@@ -33,20 +36,21 @@ ams,input-max-microamp: Maximum flash controller input current. The
33 and divisible by 50000. 36 and divisible by 50000.
34 37
35 38
36Optional properties of the "flash" child node 39Optional properties of the flash child node
37============================================= 40===========================================
38 41
39label : The label of the flash LED. 42label : The label of the flash LED.
40 43
41 44
42Required properties of the "indicator" child node 45Required properties of the indicator child node (1)
43================================================= 46===================================================
44 47
48reg: 1
45led-max-microamp: Maximum indicator current. The allowed values are 49led-max-microamp: Maximum indicator current. The allowed values are
46 2500, 5000, 7500 and 10000. 50 2500, 5000, 7500 and 10000.
47 51
48Optional properties of the "indicator" child node 52Optional properties of the indicator child node
49================================================= 53===============================================
50 54
51label : The label of the indicator LED. 55label : The label of the indicator LED.
52 56
@@ -55,16 +59,20 @@ Example
55======= 59=======
56 60
57 as3645a@30 { 61 as3645a@30 {
62 #address-cells: 1
63 #size-cells: 0
58 reg = <0x30>; 64 reg = <0x30>;
59 compatible = "ams,as3645a"; 65 compatible = "ams,as3645a";
60 flash { 66 flash@0 {
67 reg = <0x0>;
61 flash-timeout-us = <150000>; 68 flash-timeout-us = <150000>;
62 flash-max-microamp = <320000>; 69 flash-max-microamp = <320000>;
63 led-max-microamp = <60000>; 70 led-max-microamp = <60000>;
64 ams,input-max-microamp = <1750000>; 71 ams,input-max-microamp = <1750000>;
65 label = "as3645a:flash"; 72 label = "as3645a:flash";
66 }; 73 };
67 indicator { 74 indicator@1 {
75 reg = <0x1>;
68 led-max-microamp = <10000>; 76 led-max-microamp = <10000>;
69 label = "as3645a:indicator"; 77 label = "as3645a:indicator";
70 }; 78 };
diff --git a/MAINTAINERS b/MAINTAINERS
index 7ab4c373c370..d2f6ec2992f1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8603,6 +8603,12 @@ M: Sean Wang <sean.wang@mediatek.com>
8603S: Maintained 8603S: Maintained
8604F: drivers/media/rc/mtk-cir.c 8604F: drivers/media/rc/mtk-cir.c
8605 8605
8606MEDIATEK PMIC LED DRIVER
8607M: Sean Wang <sean.wang@mediatek.com>
8608S: Maintained
8609F: drivers/leds/leds-mt6323.c
8610F: Documentation/devicetree/bindings/leds/leds-mt6323.txt
8611
8606MEDIATEK ETHERNET DRIVER 8612MEDIATEK ETHERNET DRIVER
8607M: Felix Fietkau <nbd@openwrt.org> 8613M: Felix Fietkau <nbd@openwrt.org>
8608M: John Crispin <john@phrozen.org> 8614M: John Crispin <john@phrozen.org>
diff --git a/Makefile b/Makefile
index d1119941261c..cf007a31d575 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 4 1VERSION = 4
2PATCHLEVEL = 14 2PATCHLEVEL = 14
3SUBLEVEL = 0 3SUBLEVEL = 0
4EXTRAVERSION = -rc2 4EXTRAVERSION = -rc3
5NAME = Fearless Coyote 5NAME = Fearless Coyote
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
@@ -1172,11 +1172,11 @@ headers_check: headers_install
1172 1172
1173PHONY += kselftest 1173PHONY += kselftest
1174kselftest: 1174kselftest:
1175 $(Q)$(MAKE) -C tools/testing/selftests run_tests 1175 $(Q)$(MAKE) -C $(srctree)/tools/testing/selftests run_tests
1176 1176
1177PHONY += kselftest-clean 1177PHONY += kselftest-clean
1178kselftest-clean: 1178kselftest-clean:
1179 $(Q)$(MAKE) -C tools/testing/selftests clean 1179 $(Q)$(MAKE) -C $(srctree)/tools/testing/selftests clean
1180 1180
1181PHONY += kselftest-merge 1181PHONY += kselftest-merge
1182kselftest-merge: 1182kselftest-merge:
diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi
index cb47ae79a5f9..1b0bd72945f2 100644
--- a/arch/arm/boot/dts/omap3-n950-n9.dtsi
+++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi
@@ -267,15 +267,19 @@
267 clock-frequency = <400000>; 267 clock-frequency = <400000>;
268 268
269 as3645a@30 { 269 as3645a@30 {
270 #address-cells = <1>;
271 #size-cells = <0>;
270 reg = <0x30>; 272 reg = <0x30>;
271 compatible = "ams,as3645a"; 273 compatible = "ams,as3645a";
272 flash { 274 flash@0 {
275 reg = <0x0>;
273 flash-timeout-us = <150000>; 276 flash-timeout-us = <150000>;
274 flash-max-microamp = <320000>; 277 flash-max-microamp = <320000>;
275 led-max-microamp = <60000>; 278 led-max-microamp = <60000>;
276 peak-current-limit = <1750000>; 279 ams,input-max-microamp = <1750000>;
277 }; 280 };
278 indicator { 281 indicator@1 {
282 reg = <0x1>;
279 led-max-microamp = <10000>; 283 led-max-microamp = <10000>;
280 }; 284 };
281 }; 285 };
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index bc4e92337d16..b46e54c2399b 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -401,7 +401,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
401/* Find an entry in the third-level page table. */ 401/* Find an entry in the third-level page table. */
402#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) 402#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
403 403
404#define pte_offset_phys(dir,addr) (pmd_page_paddr(*(dir)) + pte_index(addr) * sizeof(pte_t)) 404#define pte_offset_phys(dir,addr) (pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t))
405#define pte_offset_kernel(dir,addr) ((pte_t *)__va(pte_offset_phys((dir), (addr)))) 405#define pte_offset_kernel(dir,addr) ((pte_t *)__va(pte_offset_phys((dir), (addr))))
406 406
407#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) 407#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 7434ec0c7a27..0b243ecaf7ac 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -384,6 +384,7 @@ ENTRY(kimage_vaddr)
384 * booted in EL1 or EL2 respectively. 384 * booted in EL1 or EL2 respectively.
385 */ 385 */
386ENTRY(el2_setup) 386ENTRY(el2_setup)
387 msr SPsel, #1 // We want to use SP_EL{1,2}
387 mrs x0, CurrentEL 388 mrs x0, CurrentEL
388 cmp x0, #CurrentEL_EL2 389 cmp x0, #CurrentEL_EL2
389 b.eq 1f 390 b.eq 1f
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 89993c4be1be..2069e9bc0fca 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -651,7 +651,7 @@ static const struct fault_info fault_info[] = {
651 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 0 translation fault" }, 651 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 0 translation fault" },
652 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" }, 652 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" },
653 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, 653 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" },
654 { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, 654 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" },
655 { do_bad, SIGBUS, 0, "unknown 8" }, 655 { do_bad, SIGBUS, 0, "unknown 8" },
656 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, 656 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" },
657 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, 657 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" },
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 9d26abdf0dc1..4f798aa671dd 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -39,7 +39,7 @@ config MICROBLAZE
39# Endianness selection 39# Endianness selection
40choice 40choice
41 prompt "Endianness selection" 41 prompt "Endianness selection"
42 default CPU_BIG_ENDIAN 42 default CPU_LITTLE_ENDIAN
43 help 43 help
44 microblaze architectures can be configured for either little or 44 microblaze architectures can be configured for either little or
45 big endian formats. Be sure to select the appropriate mode. 45 big endian formats. Be sure to select the appropriate mode.
diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild
index e77a596f3f1e..06609ca36115 100644
--- a/arch/microblaze/include/uapi/asm/Kbuild
+++ b/arch/microblaze/include/uapi/asm/Kbuild
@@ -7,6 +7,7 @@ generic-y += fcntl.h
7generic-y += ioctl.h 7generic-y += ioctl.h
8generic-y += ioctls.h 8generic-y += ioctls.h
9generic-y += ipcbuf.h 9generic-y += ipcbuf.h
10generic-y += kvm_para.h
10generic-y += mman.h 11generic-y += mman.h
11generic-y += msgbuf.h 12generic-y += msgbuf.h
12generic-y += param.h 13generic-y += param.h
diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
index e45ada8fb006..94700c5270a9 100644
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -165,7 +165,7 @@ int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
165 unsigned long attrs) 165 unsigned long attrs)
166{ 166{
167#ifdef CONFIG_MMU 167#ifdef CONFIG_MMU
168 unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 168 unsigned long user_count = vma_pages(vma);
169 unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; 169 unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
170 unsigned long off = vma->vm_pgoff; 170 unsigned long off = vma->vm_pgoff;
171 unsigned long pfn; 171 unsigned long pfn;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 17936f82d3c7..ec69fa45d5a2 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1121,6 +1121,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1121BEGIN_FTR_SECTION 1121BEGIN_FTR_SECTION
1122 mtspr SPRN_PPR, r0 1122 mtspr SPRN_PPR, r0
1123END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 1123END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1124
1125/* Move canary into DSISR to check for later */
1126BEGIN_FTR_SECTION
1127 li r0, 0x7fff
1128 mtspr SPRN_HDSISR, r0
1129END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1130
1124 ld r0, VCPU_GPR(R0)(r4) 1131 ld r0, VCPU_GPR(R0)(r4)
1125 ld r4, VCPU_GPR(R4)(r4) 1132 ld r4, VCPU_GPR(R4)(r4)
1126 1133
@@ -1956,9 +1963,14 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
1956kvmppc_hdsi: 1963kvmppc_hdsi:
1957 ld r3, VCPU_KVM(r9) 1964 ld r3, VCPU_KVM(r9)
1958 lbz r0, KVM_RADIX(r3) 1965 lbz r0, KVM_RADIX(r3)
1959 cmpwi r0, 0
1960 mfspr r4, SPRN_HDAR 1966 mfspr r4, SPRN_HDAR
1961 mfspr r6, SPRN_HDSISR 1967 mfspr r6, SPRN_HDSISR
1968BEGIN_FTR_SECTION
1969 /* Look for DSISR canary. If we find it, retry instruction */
1970 cmpdi r6, 0x7fff
1971 beq 6f
1972END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1973 cmpwi r0, 0
1962 bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */ 1974 bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */
1963 /* HPTE not found fault or protection fault? */ 1975 /* HPTE not found fault or protection fault? */
1964 andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h 1976 andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 0b034ebbda2a..7f69d17de354 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -98,7 +98,7 @@ static struct clocksource timer_clocksource = {
98 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 98 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
99}; 99};
100 100
101static void __init timer_setup(void) 101static void __init um_timer_setup(void)
102{ 102{
103 int err; 103 int err;
104 104
@@ -132,5 +132,5 @@ void read_persistent_clock(struct timespec *ts)
132void __init time_init(void) 132void __init time_init(void)
133{ 133{
134 timer_set_signal_handler(); 134 timer_set_signal_handler();
135 late_time_init = timer_setup; 135 late_time_init = um_timer_setup;
136} 136}
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 4cf100ff2a37..72db0664a53d 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -552,6 +552,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
552 552
553 X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates), 553 X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates),
554 X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates), 554 X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
555 X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates),
555 556
556 X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, snb_cstates), 557 X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, snb_cstates),
557 X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates), 558 X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates),
@@ -560,6 +561,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
560 X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates), 561 X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
561 562
562 X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates), 563 X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
564 X86_CSTATES_MODEL(INTEL_FAM6_ATOM_DENVERTON, glm_cstates),
565
566 X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GEMINI_LAKE, glm_cstates),
563 { }, 567 { },
564}; 568};
565MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); 569MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 8e2457cb6b4a..005908ee9333 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -775,6 +775,9 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
775 X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init), 775 X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init),
776 776
777 X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init), 777 X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
778 X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_DENVERTON, hsw_rapl_init),
779
780 X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GEMINI_LAKE, hsw_rapl_init),
778 {}, 781 {},
779}; 782};
780 783
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index db1fe377e6dd..a7196818416a 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3462,7 +3462,7 @@ static struct intel_uncore_ops skx_uncore_iio_ops = {
3462static struct intel_uncore_type skx_uncore_iio = { 3462static struct intel_uncore_type skx_uncore_iio = {
3463 .name = "iio", 3463 .name = "iio",
3464 .num_counters = 4, 3464 .num_counters = 4,
3465 .num_boxes = 5, 3465 .num_boxes = 6,
3466 .perf_ctr_bits = 48, 3466 .perf_ctr_bits = 48,
3467 .event_ctl = SKX_IIO0_MSR_PMON_CTL0, 3467 .event_ctl = SKX_IIO0_MSR_PMON_CTL0,
3468 .perf_ctr = SKX_IIO0_MSR_PMON_CTR0, 3468 .perf_ctr = SKX_IIO0_MSR_PMON_CTR0,
@@ -3492,7 +3492,7 @@ static const struct attribute_group skx_uncore_format_group = {
3492static struct intel_uncore_type skx_uncore_irp = { 3492static struct intel_uncore_type skx_uncore_irp = {
3493 .name = "irp", 3493 .name = "irp",
3494 .num_counters = 2, 3494 .num_counters = 2,
3495 .num_boxes = 5, 3495 .num_boxes = 6,
3496 .perf_ctr_bits = 48, 3496 .perf_ctr_bits = 48,
3497 .event_ctl = SKX_IRP0_MSR_PMON_CTL0, 3497 .event_ctl = SKX_IRP0_MSR_PMON_CTL0,
3498 .perf_ctr = SKX_IRP0_MSR_PMON_CTR0, 3498 .perf_ctr = SKX_IRP0_MSR_PMON_CTR0,
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 4bb3ec69e8ea..06723671ae4e 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -63,6 +63,14 @@ static bool test_intel(int idx)
63 case INTEL_FAM6_ATOM_SILVERMONT1: 63 case INTEL_FAM6_ATOM_SILVERMONT1:
64 case INTEL_FAM6_ATOM_SILVERMONT2: 64 case INTEL_FAM6_ATOM_SILVERMONT2:
65 case INTEL_FAM6_ATOM_AIRMONT: 65 case INTEL_FAM6_ATOM_AIRMONT:
66
67 case INTEL_FAM6_ATOM_GOLDMONT:
68 case INTEL_FAM6_ATOM_DENVERTON:
69
70 case INTEL_FAM6_ATOM_GEMINI_LAKE:
71
72 case INTEL_FAM6_XEON_PHI_KNL:
73 case INTEL_FAM6_XEON_PHI_KNM:
66 if (idx == PERF_MSR_SMI) 74 if (idx == PERF_MSR_SMI)
67 return true; 75 return true;
68 break; 76 break;
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index e0bb46c02857..0e2a5edbce00 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -231,7 +231,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
231 ksig->ka.sa.sa_restorer) 231 ksig->ka.sa.sa_restorer)
232 sp = (unsigned long) ksig->ka.sa.sa_restorer; 232 sp = (unsigned long) ksig->ka.sa.sa_restorer;
233 233
234 if (fpu->fpstate_active) { 234 if (fpu->initialized) {
235 unsigned long fx_aligned, math_size; 235 unsigned long fx_aligned, math_size;
236 236
237 sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size); 237 sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index c1eadbaf1115..b0dc91f4bedc 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -11,10 +11,12 @@
11# define __ASM_FORM_COMMA(x) " " #x "," 11# define __ASM_FORM_COMMA(x) " " #x ","
12#endif 12#endif
13 13
14#ifdef CONFIG_X86_32 14#ifndef __x86_64__
15/* 32 bit */
15# define __ASM_SEL(a,b) __ASM_FORM(a) 16# define __ASM_SEL(a,b) __ASM_FORM(a)
16# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) 17# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
17#else 18#else
19/* 64 bit */
18# define __ASM_SEL(a,b) __ASM_FORM(b) 20# define __ASM_SEL(a,b) __ASM_FORM(b)
19# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) 21# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
20#endif 22#endif
@@ -139,8 +141,8 @@
139 * gets set up by the containing function. If you forget to do this, objtool 141 * gets set up by the containing function. If you forget to do this, objtool
140 * may print a "call without frame pointer save/setup" warning. 142 * may print a "call without frame pointer save/setup" warning.
141 */ 143 */
142register unsigned int __asm_call_sp asm("esp"); 144register unsigned long current_stack_pointer asm(_ASM_SP);
143#define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp) 145#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
144#endif 146#endif
145 147
146#endif /* _ASM_X86_ASM_H */ 148#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 554cdb205d17..e3221ffa304e 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -23,11 +23,9 @@
23/* 23/*
24 * High level FPU state handling functions: 24 * High level FPU state handling functions:
25 */ 25 */
26extern void fpu__activate_curr(struct fpu *fpu); 26extern void fpu__initialize(struct fpu *fpu);
27extern void fpu__activate_fpstate_read(struct fpu *fpu); 27extern void fpu__prepare_read(struct fpu *fpu);
28extern void fpu__activate_fpstate_write(struct fpu *fpu); 28extern void fpu__prepare_write(struct fpu *fpu);
29extern void fpu__current_fpstate_write_begin(void);
30extern void fpu__current_fpstate_write_end(void);
31extern void fpu__save(struct fpu *fpu); 29extern void fpu__save(struct fpu *fpu);
32extern void fpu__restore(struct fpu *fpu); 30extern void fpu__restore(struct fpu *fpu);
33extern int fpu__restore_sig(void __user *buf, int ia32_frame); 31extern int fpu__restore_sig(void __user *buf, int ia32_frame);
@@ -120,20 +118,11 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu);
120 err; \ 118 err; \
121}) 119})
122 120
123#define check_insn(insn, output, input...) \ 121#define kernel_insn(insn, output, input...) \
124({ \
125 int err; \
126 asm volatile("1:" #insn "\n\t" \ 122 asm volatile("1:" #insn "\n\t" \
127 "2:\n" \ 123 "2:\n" \
128 ".section .fixup,\"ax\"\n" \ 124 _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \
129 "3: movl $-1,%[err]\n" \ 125 : output : input)
130 " jmp 2b\n" \
131 ".previous\n" \
132 _ASM_EXTABLE(1b, 3b) \
133 : [err] "=r" (err), output \
134 : "0"(0), input); \
135 err; \
136})
137 126
138static inline int copy_fregs_to_user(struct fregs_state __user *fx) 127static inline int copy_fregs_to_user(struct fregs_state __user *fx)
139{ 128{
@@ -153,20 +142,16 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
153 142
154static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) 143static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
155{ 144{
156 int err;
157
158 if (IS_ENABLED(CONFIG_X86_32)) { 145 if (IS_ENABLED(CONFIG_X86_32)) {
159 err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); 146 kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
160 } else { 147 } else {
161 if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) { 148 if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
162 err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); 149 kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
163 } else { 150 } else {
164 /* See comment in copy_fxregs_to_kernel() below. */ 151 /* See comment in copy_fxregs_to_kernel() below. */
165 err = check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx)); 152 kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
166 } 153 }
167 } 154 }
168 /* Copying from a kernel buffer to FPU registers should never fail: */
169 WARN_ON_FPU(err);
170} 155}
171 156
172static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) 157static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
@@ -183,9 +168,7 @@ static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
183 168
184static inline void copy_kernel_to_fregs(struct fregs_state *fx) 169static inline void copy_kernel_to_fregs(struct fregs_state *fx)
185{ 170{
186 int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); 171 kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
187
188 WARN_ON_FPU(err);
189} 172}
190 173
191static inline int copy_user_to_fregs(struct fregs_state __user *fx) 174static inline int copy_user_to_fregs(struct fregs_state __user *fx)
@@ -281,18 +264,13 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
281 * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact 264 * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
282 * XSAVE area format. 265 * XSAVE area format.
283 */ 266 */
284#define XSTATE_XRESTORE(st, lmask, hmask, err) \ 267#define XSTATE_XRESTORE(st, lmask, hmask) \
285 asm volatile(ALTERNATIVE(XRSTOR, \ 268 asm volatile(ALTERNATIVE(XRSTOR, \
286 XRSTORS, X86_FEATURE_XSAVES) \ 269 XRSTORS, X86_FEATURE_XSAVES) \
287 "\n" \ 270 "\n" \
288 "xor %[err], %[err]\n" \
289 "3:\n" \ 271 "3:\n" \
290 ".pushsection .fixup,\"ax\"\n" \ 272 _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\
291 "4: movl $-2, %[err]\n" \ 273 : \
292 "jmp 3b\n" \
293 ".popsection\n" \
294 _ASM_EXTABLE(661b, 4b) \
295 : [err] "=r" (err) \
296 : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ 274 : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
297 : "memory") 275 : "memory")
298 276
@@ -336,7 +314,10 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
336 else 314 else
337 XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); 315 XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
338 316
339 /* We should never fault when copying from a kernel buffer: */ 317 /*
318 * We should never fault when copying from a kernel buffer, and the FPU
319 * state we set at boot time should be valid.
320 */
340 WARN_ON_FPU(err); 321 WARN_ON_FPU(err);
341} 322}
342 323
@@ -350,7 +331,7 @@ static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
350 u32 hmask = mask >> 32; 331 u32 hmask = mask >> 32;
351 int err; 332 int err;
352 333
353 WARN_ON(!alternatives_patched); 334 WARN_ON_FPU(!alternatives_patched);
354 335
355 XSTATE_XSAVE(xstate, lmask, hmask, err); 336 XSTATE_XSAVE(xstate, lmask, hmask, err);
356 337
@@ -365,12 +346,8 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
365{ 346{
366 u32 lmask = mask; 347 u32 lmask = mask;
367 u32 hmask = mask >> 32; 348 u32 hmask = mask >> 32;
368 int err;
369
370 XSTATE_XRESTORE(xstate, lmask, hmask, err);
371 349
372 /* We should never fault when copying from a kernel buffer: */ 350 XSTATE_XRESTORE(xstate, lmask, hmask);
373 WARN_ON_FPU(err);
374} 351}
375 352
376/* 353/*
@@ -526,38 +503,17 @@ static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
526 */ 503 */
527static inline void fpregs_deactivate(struct fpu *fpu) 504static inline void fpregs_deactivate(struct fpu *fpu)
528{ 505{
529 WARN_ON_FPU(!fpu->fpregs_active);
530
531 fpu->fpregs_active = 0;
532 this_cpu_write(fpu_fpregs_owner_ctx, NULL); 506 this_cpu_write(fpu_fpregs_owner_ctx, NULL);
533 trace_x86_fpu_regs_deactivated(fpu); 507 trace_x86_fpu_regs_deactivated(fpu);
534} 508}
535 509
536static inline void fpregs_activate(struct fpu *fpu) 510static inline void fpregs_activate(struct fpu *fpu)
537{ 511{
538 WARN_ON_FPU(fpu->fpregs_active);
539
540 fpu->fpregs_active = 1;
541 this_cpu_write(fpu_fpregs_owner_ctx, fpu); 512 this_cpu_write(fpu_fpregs_owner_ctx, fpu);
542 trace_x86_fpu_regs_activated(fpu); 513 trace_x86_fpu_regs_activated(fpu);
543} 514}
544 515
545/* 516/*
546 * The question "does this thread have fpu access?"
547 * is slightly racy, since preemption could come in
548 * and revoke it immediately after the test.
549 *
550 * However, even in that very unlikely scenario,
551 * we can just assume we have FPU access - typically
552 * to save the FP state - we'll just take a #NM
553 * fault and get the FPU access back.
554 */
555static inline int fpregs_active(void)
556{
557 return current->thread.fpu.fpregs_active;
558}
559
560/*
561 * FPU state switching for scheduling. 517 * FPU state switching for scheduling.
562 * 518 *
563 * This is a two-stage process: 519 * This is a two-stage process:
@@ -571,14 +527,13 @@ static inline int fpregs_active(void)
571static inline void 527static inline void
572switch_fpu_prepare(struct fpu *old_fpu, int cpu) 528switch_fpu_prepare(struct fpu *old_fpu, int cpu)
573{ 529{
574 if (old_fpu->fpregs_active) { 530 if (old_fpu->initialized) {
575 if (!copy_fpregs_to_fpstate(old_fpu)) 531 if (!copy_fpregs_to_fpstate(old_fpu))
576 old_fpu->last_cpu = -1; 532 old_fpu->last_cpu = -1;
577 else 533 else
578 old_fpu->last_cpu = cpu; 534 old_fpu->last_cpu = cpu;
579 535
580 /* But leave fpu_fpregs_owner_ctx! */ 536 /* But leave fpu_fpregs_owner_ctx! */
581 old_fpu->fpregs_active = 0;
582 trace_x86_fpu_regs_deactivated(old_fpu); 537 trace_x86_fpu_regs_deactivated(old_fpu);
583 } else 538 } else
584 old_fpu->last_cpu = -1; 539 old_fpu->last_cpu = -1;
@@ -595,7 +550,7 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
595static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu) 550static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
596{ 551{
597 bool preload = static_cpu_has(X86_FEATURE_FPU) && 552 bool preload = static_cpu_has(X86_FEATURE_FPU) &&
598 new_fpu->fpstate_active; 553 new_fpu->initialized;
599 554
600 if (preload) { 555 if (preload) {
601 if (!fpregs_state_valid(new_fpu, cpu)) 556 if (!fpregs_state_valid(new_fpu, cpu))
@@ -617,8 +572,7 @@ static inline void user_fpu_begin(void)
617 struct fpu *fpu = &current->thread.fpu; 572 struct fpu *fpu = &current->thread.fpu;
618 573
619 preempt_disable(); 574 preempt_disable();
620 if (!fpregs_active()) 575 fpregs_activate(fpu);
621 fpregs_activate(fpu);
622 preempt_enable(); 576 preempt_enable();
623} 577}
624 578
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 3c80f5b9c09d..a1520575d86b 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -68,6 +68,9 @@ struct fxregs_state {
68/* Default value for fxregs_state.mxcsr: */ 68/* Default value for fxregs_state.mxcsr: */
69#define MXCSR_DEFAULT 0x1f80 69#define MXCSR_DEFAULT 0x1f80
70 70
71/* Copy both mxcsr & mxcsr_flags with a single u64 memcpy: */
72#define MXCSR_AND_FLAGS_SIZE sizeof(u64)
73
71/* 74/*
72 * Software based FPU emulation state. This is arbitrary really, 75 * Software based FPU emulation state. This is arbitrary really,
73 * it matches the x87 format to make it easier to understand: 76 * it matches the x87 format to make it easier to understand:
@@ -290,36 +293,13 @@ struct fpu {
290 unsigned int last_cpu; 293 unsigned int last_cpu;
291 294
292 /* 295 /*
293 * @fpstate_active: 296 * @initialized:
294 * 297 *
295 * This flag indicates whether this context is active: if the task 298 * This flag indicates whether this context is initialized: if the task
296 * is not running then we can restore from this context, if the task 299 * is not running then we can restore from this context, if the task
297 * is running then we should save into this context. 300 * is running then we should save into this context.
298 */ 301 */
299 unsigned char fpstate_active; 302 unsigned char initialized;
300
301 /*
302 * @fpregs_active:
303 *
304 * This flag determines whether a given context is actively
305 * loaded into the FPU's registers and that those registers
306 * represent the task's current FPU state.
307 *
308 * Note the interaction with fpstate_active:
309 *
310 * # task does not use the FPU:
311 * fpstate_active == 0
312 *
313 * # task uses the FPU and regs are active:
314 * fpstate_active == 1 && fpregs_active == 1
315 *
316 * # the regs are inactive but still match fpstate:
317 * fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu
318 *
319 * The third state is what we use for the lazy restore optimization
320 * on lazy-switching CPUs.
321 */
322 unsigned char fpregs_active;
323 303
324 /* 304 /*
325 * @state: 305 * @state:
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 1b2799e0699a..83fee2469eb7 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -48,8 +48,12 @@ void fpu__xstate_clear_all_cpu_caps(void);
48void *get_xsave_addr(struct xregs_state *xsave, int xstate); 48void *get_xsave_addr(struct xregs_state *xsave, int xstate);
49const void *get_xsave_field_ptr(int xstate_field); 49const void *get_xsave_field_ptr(int xstate_field);
50int using_compacted_format(void); 50int using_compacted_format(void);
51int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf, 51int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
52 void __user *ubuf, struct xregs_state *xsave); 52int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
53int copyin_to_xsaves(const void *kbuf, const void __user *ubuf, 53int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
54 struct xregs_state *xsave); 54int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
55
56/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
57extern int validate_xstate_header(const struct xstate_header *hdr);
58
55#endif 59#endif
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 5161da1a0fa0..89e7eeb5cec1 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -158,17 +158,6 @@ struct thread_info {
158 */ 158 */
159#ifndef __ASSEMBLY__ 159#ifndef __ASSEMBLY__
160 160
161static inline unsigned long current_stack_pointer(void)
162{
163 unsigned long sp;
164#ifdef CONFIG_X86_64
165 asm("mov %%rsp,%0" : "=g" (sp));
166#else
167 asm("mov %%esp,%0" : "=g" (sp));
168#endif
169 return sp;
170}
171
172/* 161/*
173 * Walks up the stack frames to make sure that the specified object is 162 * Walks up the stack frames to make sure that the specified object is
174 * entirely contained by a single stack frame. 163 * entirely contained by a single stack frame.
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 342e59789fcd..39f7a27bef13 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -12,25 +12,22 @@ DECLARE_EVENT_CLASS(x86_fpu,
12 12
13 TP_STRUCT__entry( 13 TP_STRUCT__entry(
14 __field(struct fpu *, fpu) 14 __field(struct fpu *, fpu)
15 __field(bool, fpregs_active) 15 __field(bool, initialized)
16 __field(bool, fpstate_active)
17 __field(u64, xfeatures) 16 __field(u64, xfeatures)
18 __field(u64, xcomp_bv) 17 __field(u64, xcomp_bv)
19 ), 18 ),
20 19
21 TP_fast_assign( 20 TP_fast_assign(
22 __entry->fpu = fpu; 21 __entry->fpu = fpu;
23 __entry->fpregs_active = fpu->fpregs_active; 22 __entry->initialized = fpu->initialized;
24 __entry->fpstate_active = fpu->fpstate_active;
25 if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { 23 if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
26 __entry->xfeatures = fpu->state.xsave.header.xfeatures; 24 __entry->xfeatures = fpu->state.xsave.header.xfeatures;
27 __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; 25 __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv;
28 } 26 }
29 ), 27 ),
30 TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: %llx xcomp_bv: %llx", 28 TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx",
31 __entry->fpu, 29 __entry->fpu,
32 __entry->fpregs_active, 30 __entry->initialized,
33 __entry->fpstate_active,
34 __entry->xfeatures, 31 __entry->xfeatures,
35 __entry->xcomp_bv 32 __entry->xcomp_bv
36 ) 33 )
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 78e8fcc87d4c..4b892917edeb 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -337,7 +337,7 @@ do { \
337 _ASM_EXTABLE(1b, 4b) \ 337 _ASM_EXTABLE(1b, 4b) \
338 _ASM_EXTABLE(2b, 4b) \ 338 _ASM_EXTABLE(2b, 4b) \
339 : "=r" (retval), "=&A"(x) \ 339 : "=r" (retval), "=&A"(x) \
340 : "m" (__m(__ptr)), "m" __m(((u32 *)(__ptr)) + 1), \ 340 : "m" (__m(__ptr)), "m" __m(((u32 __user *)(__ptr)) + 1), \
341 "i" (errret), "0" (retval)); \ 341 "i" (errret), "0" (retval)); \
342}) 342})
343 343
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 128a1a0b1450..7cb282e9e587 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -551,13 +551,13 @@ static inline void
551MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, 551MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
552 struct desc_struct desc) 552 struct desc_struct desc)
553{ 553{
554 u32 *p = (u32 *) &desc;
555
556 mcl->op = __HYPERVISOR_update_descriptor; 554 mcl->op = __HYPERVISOR_update_descriptor;
557 if (sizeof(maddr) == sizeof(long)) { 555 if (sizeof(maddr) == sizeof(long)) {
558 mcl->args[0] = maddr; 556 mcl->args[0] = maddr;
559 mcl->args[1] = *(unsigned long *)&desc; 557 mcl->args[1] = *(unsigned long *)&desc;
560 } else { 558 } else {
559 u32 *p = (u32 *)&desc;
560
561 mcl->args[0] = maddr; 561 mcl->args[0] = maddr;
562 mcl->args[1] = maddr >> 32; 562 mcl->args[1] = maddr >> 32;
563 mcl->args[2] = *p++; 563 mcl->args[2] = *p++;
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index e1114f070c2d..f92a6593de1e 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -100,7 +100,7 @@ void __kernel_fpu_begin(void)
100 100
101 kernel_fpu_disable(); 101 kernel_fpu_disable();
102 102
103 if (fpu->fpregs_active) { 103 if (fpu->initialized) {
104 /* 104 /*
105 * Ignore return value -- we don't care if reg state 105 * Ignore return value -- we don't care if reg state
106 * is clobbered. 106 * is clobbered.
@@ -116,7 +116,7 @@ void __kernel_fpu_end(void)
116{ 116{
117 struct fpu *fpu = &current->thread.fpu; 117 struct fpu *fpu = &current->thread.fpu;
118 118
119 if (fpu->fpregs_active) 119 if (fpu->initialized)
120 copy_kernel_to_fpregs(&fpu->state); 120 copy_kernel_to_fpregs(&fpu->state);
121 121
122 kernel_fpu_enable(); 122 kernel_fpu_enable();
@@ -148,7 +148,7 @@ void fpu__save(struct fpu *fpu)
148 148
149 preempt_disable(); 149 preempt_disable();
150 trace_x86_fpu_before_save(fpu); 150 trace_x86_fpu_before_save(fpu);
151 if (fpu->fpregs_active) { 151 if (fpu->initialized) {
152 if (!copy_fpregs_to_fpstate(fpu)) { 152 if (!copy_fpregs_to_fpstate(fpu)) {
153 copy_kernel_to_fpregs(&fpu->state); 153 copy_kernel_to_fpregs(&fpu->state);
154 } 154 }
@@ -189,10 +189,9 @@ EXPORT_SYMBOL_GPL(fpstate_init);
189 189
190int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) 190int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
191{ 191{
192 dst_fpu->fpregs_active = 0;
193 dst_fpu->last_cpu = -1; 192 dst_fpu->last_cpu = -1;
194 193
195 if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU)) 194 if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU))
196 return 0; 195 return 0;
197 196
198 WARN_ON_FPU(src_fpu != &current->thread.fpu); 197 WARN_ON_FPU(src_fpu != &current->thread.fpu);
@@ -206,26 +205,14 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
206 /* 205 /*
207 * Save current FPU registers directly into the child 206 * Save current FPU registers directly into the child
208 * FPU context, without any memory-to-memory copying. 207 * FPU context, without any memory-to-memory copying.
209 * In lazy mode, if the FPU context isn't loaded into
210 * fpregs, CR0.TS will be set and do_device_not_available
211 * will load the FPU context.
212 * 208 *
213 * We have to do all this with preemption disabled, 209 * ( The function 'fails' in the FNSAVE case, which destroys
214 * mostly because of the FNSAVE case, because in that 210 * register contents so we have to copy them back. )
215 * case we must not allow preemption in the window
216 * between the FNSAVE and us marking the context lazy.
217 *
218 * It shouldn't be an issue as even FNSAVE is plenty
219 * fast in terms of critical section length.
220 */ 211 */
221 preempt_disable();
222 if (!copy_fpregs_to_fpstate(dst_fpu)) { 212 if (!copy_fpregs_to_fpstate(dst_fpu)) {
223 memcpy(&src_fpu->state, &dst_fpu->state, 213 memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size);
224 fpu_kernel_xstate_size);
225
226 copy_kernel_to_fpregs(&src_fpu->state); 214 copy_kernel_to_fpregs(&src_fpu->state);
227 } 215 }
228 preempt_enable();
229 216
230 trace_x86_fpu_copy_src(src_fpu); 217 trace_x86_fpu_copy_src(src_fpu);
231 trace_x86_fpu_copy_dst(dst_fpu); 218 trace_x86_fpu_copy_dst(dst_fpu);
@@ -237,45 +224,48 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
237 * Activate the current task's in-memory FPU context, 224 * Activate the current task's in-memory FPU context,
238 * if it has not been used before: 225 * if it has not been used before:
239 */ 226 */
240void fpu__activate_curr(struct fpu *fpu) 227void fpu__initialize(struct fpu *fpu)
241{ 228{
242 WARN_ON_FPU(fpu != &current->thread.fpu); 229 WARN_ON_FPU(fpu != &current->thread.fpu);
243 230
244 if (!fpu->fpstate_active) { 231 if (!fpu->initialized) {
245 fpstate_init(&fpu->state); 232 fpstate_init(&fpu->state);
246 trace_x86_fpu_init_state(fpu); 233 trace_x86_fpu_init_state(fpu);
247 234
248 trace_x86_fpu_activate_state(fpu); 235 trace_x86_fpu_activate_state(fpu);
249 /* Safe to do for the current task: */ 236 /* Safe to do for the current task: */
250 fpu->fpstate_active = 1; 237 fpu->initialized = 1;
251 } 238 }
252} 239}
253EXPORT_SYMBOL_GPL(fpu__activate_curr); 240EXPORT_SYMBOL_GPL(fpu__initialize);
254 241
255/* 242/*
256 * This function must be called before we read a task's fpstate. 243 * This function must be called before we read a task's fpstate.
257 * 244 *
258 * If the task has not used the FPU before then initialize its 245 * There's two cases where this gets called:
259 * fpstate. 246 *
247 * - for the current task (when coredumping), in which case we have
248 * to save the latest FPU registers into the fpstate,
249 *
250 * - or it's called for stopped tasks (ptrace), in which case the
251 * registers were already saved by the context-switch code when
252 * the task scheduled out - we only have to initialize the registers
253 * if they've never been initialized.
260 * 254 *
261 * If the task has used the FPU before then save it. 255 * If the task has used the FPU before then save it.
262 */ 256 */
263void fpu__activate_fpstate_read(struct fpu *fpu) 257void fpu__prepare_read(struct fpu *fpu)
264{ 258{
265 /* 259 if (fpu == &current->thread.fpu) {
266 * If fpregs are active (in the current CPU), then
267 * copy them to the fpstate:
268 */
269 if (fpu->fpregs_active) {
270 fpu__save(fpu); 260 fpu__save(fpu);
271 } else { 261 } else {
272 if (!fpu->fpstate_active) { 262 if (!fpu->initialized) {
273 fpstate_init(&fpu->state); 263 fpstate_init(&fpu->state);
274 trace_x86_fpu_init_state(fpu); 264 trace_x86_fpu_init_state(fpu);
275 265
276 trace_x86_fpu_activate_state(fpu); 266 trace_x86_fpu_activate_state(fpu);
277 /* Safe to do for current and for stopped child tasks: */ 267 /* Safe to do for current and for stopped child tasks: */
278 fpu->fpstate_active = 1; 268 fpu->initialized = 1;
279 } 269 }
280 } 270 }
281} 271}
@@ -283,17 +273,17 @@ void fpu__activate_fpstate_read(struct fpu *fpu)
283/* 273/*
284 * This function must be called before we write a task's fpstate. 274 * This function must be called before we write a task's fpstate.
285 * 275 *
286 * If the task has used the FPU before then unlazy it. 276 * If the task has used the FPU before then invalidate any cached FPU registers.
287 * If the task has not used the FPU before then initialize its fpstate. 277 * If the task has not used the FPU before then initialize its fpstate.
288 * 278 *
289 * After this function call, after registers in the fpstate are 279 * After this function call, after registers in the fpstate are
290 * modified and the child task has woken up, the child task will 280 * modified and the child task has woken up, the child task will
291 * restore the modified FPU state from the modified context. If we 281 * restore the modified FPU state from the modified context. If we
292 * didn't clear its lazy status here then the lazy in-registers 282 * didn't clear its cached status here then the cached in-registers
293 * state pending on its former CPU could be restored, corrupting 283 * state pending on its former CPU could be restored, corrupting
294 * the modifications. 284 * the modifications.
295 */ 285 */
296void fpu__activate_fpstate_write(struct fpu *fpu) 286void fpu__prepare_write(struct fpu *fpu)
297{ 287{
298 /* 288 /*
299 * Only stopped child tasks can be used to modify the FPU 289 * Only stopped child tasks can be used to modify the FPU
@@ -301,8 +291,8 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
301 */ 291 */
302 WARN_ON_FPU(fpu == &current->thread.fpu); 292 WARN_ON_FPU(fpu == &current->thread.fpu);
303 293
304 if (fpu->fpstate_active) { 294 if (fpu->initialized) {
305 /* Invalidate any lazy state: */ 295 /* Invalidate any cached state: */
306 __fpu_invalidate_fpregs_state(fpu); 296 __fpu_invalidate_fpregs_state(fpu);
307 } else { 297 } else {
308 fpstate_init(&fpu->state); 298 fpstate_init(&fpu->state);
@@ -310,74 +300,11 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
310 300
311 trace_x86_fpu_activate_state(fpu); 301 trace_x86_fpu_activate_state(fpu);
312 /* Safe to do for stopped child tasks: */ 302 /* Safe to do for stopped child tasks: */
313 fpu->fpstate_active = 1; 303 fpu->initialized = 1;
314 } 304 }
315} 305}
316 306
317/* 307/*
318 * This function must be called before we write the current
319 * task's fpstate.
320 *
321 * This call gets the current FPU register state and moves
322 * it in to the 'fpstate'. Preemption is disabled so that
323 * no writes to the 'fpstate' can occur from context
324 * swiches.
325 *
326 * Must be followed by a fpu__current_fpstate_write_end().
327 */
328void fpu__current_fpstate_write_begin(void)
329{
330 struct fpu *fpu = &current->thread.fpu;
331
332 /*
333 * Ensure that the context-switching code does not write
334 * over the fpstate while we are doing our update.
335 */
336 preempt_disable();
337
338 /*
339 * Move the fpregs in to the fpu's 'fpstate'.
340 */
341 fpu__activate_fpstate_read(fpu);
342
343 /*
344 * The caller is about to write to 'fpu'. Ensure that no
345 * CPU thinks that its fpregs match the fpstate. This
346 * ensures we will not be lazy and skip a XRSTOR in the
347 * future.
348 */
349 __fpu_invalidate_fpregs_state(fpu);
350}
351
352/*
353 * This function must be paired with fpu__current_fpstate_write_begin()
354 *
355 * This will ensure that the modified fpstate gets placed back in
356 * the fpregs if necessary.
357 *
358 * Note: This function may be called whether or not an _actual_
359 * write to the fpstate occurred.
360 */
361void fpu__current_fpstate_write_end(void)
362{
363 struct fpu *fpu = &current->thread.fpu;
364
365 /*
366 * 'fpu' now has an updated copy of the state, but the
367 * registers may still be out of date. Update them with
368 * an XRSTOR if they are active.
369 */
370 if (fpregs_active())
371 copy_kernel_to_fpregs(&fpu->state);
372
373 /*
374 * Our update is done and the fpregs/fpstate are in sync
375 * if necessary. Context switches can happen again.
376 */
377 preempt_enable();
378}
379
380/*
381 * 'fpu__restore()' is called to copy FPU registers from 308 * 'fpu__restore()' is called to copy FPU registers from
382 * the FPU fpstate to the live hw registers and to activate 309 * the FPU fpstate to the live hw registers and to activate
383 * access to the hardware registers, so that FPU instructions 310 * access to the hardware registers, so that FPU instructions
@@ -389,7 +316,7 @@ void fpu__current_fpstate_write_end(void)
389 */ 316 */
390void fpu__restore(struct fpu *fpu) 317void fpu__restore(struct fpu *fpu)
391{ 318{
392 fpu__activate_curr(fpu); 319 fpu__initialize(fpu);
393 320
394 /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ 321 /* Avoid __kernel_fpu_begin() right after fpregs_activate() */
395 kernel_fpu_disable(); 322 kernel_fpu_disable();
@@ -414,15 +341,17 @@ void fpu__drop(struct fpu *fpu)
414{ 341{
415 preempt_disable(); 342 preempt_disable();
416 343
417 if (fpu->fpregs_active) { 344 if (fpu == &current->thread.fpu) {
418 /* Ignore delayed exceptions from user space */ 345 if (fpu->initialized) {
419 asm volatile("1: fwait\n" 346 /* Ignore delayed exceptions from user space */
420 "2:\n" 347 asm volatile("1: fwait\n"
421 _ASM_EXTABLE(1b, 2b)); 348 "2:\n"
422 fpregs_deactivate(fpu); 349 _ASM_EXTABLE(1b, 2b));
350 fpregs_deactivate(fpu);
351 }
423 } 352 }
424 353
425 fpu->fpstate_active = 0; 354 fpu->initialized = 0;
426 355
427 trace_x86_fpu_dropped(fpu); 356 trace_x86_fpu_dropped(fpu);
428 357
@@ -462,9 +391,11 @@ void fpu__clear(struct fpu *fpu)
462 * Make sure fpstate is cleared and initialized. 391 * Make sure fpstate is cleared and initialized.
463 */ 392 */
464 if (static_cpu_has(X86_FEATURE_FPU)) { 393 if (static_cpu_has(X86_FEATURE_FPU)) {
465 fpu__activate_curr(fpu); 394 preempt_disable();
395 fpu__initialize(fpu);
466 user_fpu_begin(); 396 user_fpu_begin();
467 copy_init_fpstate_to_fpregs(); 397 copy_init_fpstate_to_fpregs();
398 preempt_enable();
468 } 399 }
469} 400}
470 401
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index d5d44c452624..7affb7e3d9a5 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -240,7 +240,7 @@ static void __init fpu__init_system_ctx_switch(void)
240 WARN_ON_FPU(!on_boot_cpu); 240 WARN_ON_FPU(!on_boot_cpu);
241 on_boot_cpu = 0; 241 on_boot_cpu = 0;
242 242
243 WARN_ON_FPU(current->thread.fpu.fpstate_active); 243 WARN_ON_FPU(current->thread.fpu.initialized);
244} 244}
245 245
246/* 246/*
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index b188b16841e3..3ea151372389 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -16,14 +16,14 @@ int regset_fpregs_active(struct task_struct *target, const struct user_regset *r
16{ 16{
17 struct fpu *target_fpu = &target->thread.fpu; 17 struct fpu *target_fpu = &target->thread.fpu;
18 18
19 return target_fpu->fpstate_active ? regset->n : 0; 19 return target_fpu->initialized ? regset->n : 0;
20} 20}
21 21
22int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset) 22int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
23{ 23{
24 struct fpu *target_fpu = &target->thread.fpu; 24 struct fpu *target_fpu = &target->thread.fpu;
25 25
26 if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active) 26 if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->initialized)
27 return regset->n; 27 return regset->n;
28 else 28 else
29 return 0; 29 return 0;
@@ -38,7 +38,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
38 if (!boot_cpu_has(X86_FEATURE_FXSR)) 38 if (!boot_cpu_has(X86_FEATURE_FXSR))
39 return -ENODEV; 39 return -ENODEV;
40 40
41 fpu__activate_fpstate_read(fpu); 41 fpu__prepare_read(fpu);
42 fpstate_sanitize_xstate(fpu); 42 fpstate_sanitize_xstate(fpu);
43 43
44 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 44 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
@@ -55,7 +55,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
55 if (!boot_cpu_has(X86_FEATURE_FXSR)) 55 if (!boot_cpu_has(X86_FEATURE_FXSR))
56 return -ENODEV; 56 return -ENODEV;
57 57
58 fpu__activate_fpstate_write(fpu); 58 fpu__prepare_write(fpu);
59 fpstate_sanitize_xstate(fpu); 59 fpstate_sanitize_xstate(fpu);
60 60
61 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, 61 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
@@ -89,10 +89,13 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
89 89
90 xsave = &fpu->state.xsave; 90 xsave = &fpu->state.xsave;
91 91
92 fpu__activate_fpstate_read(fpu); 92 fpu__prepare_read(fpu);
93 93
94 if (using_compacted_format()) { 94 if (using_compacted_format()) {
95 ret = copyout_from_xsaves(pos, count, kbuf, ubuf, xsave); 95 if (kbuf)
96 ret = copy_xstate_to_kernel(kbuf, xsave, pos, count);
97 else
98 ret = copy_xstate_to_user(ubuf, xsave, pos, count);
96 } else { 99 } else {
97 fpstate_sanitize_xstate(fpu); 100 fpstate_sanitize_xstate(fpu);
98 /* 101 /*
@@ -129,28 +132,29 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
129 132
130 xsave = &fpu->state.xsave; 133 xsave = &fpu->state.xsave;
131 134
132 fpu__activate_fpstate_write(fpu); 135 fpu__prepare_write(fpu);
133 136
134 if (boot_cpu_has(X86_FEATURE_XSAVES)) 137 if (using_compacted_format()) {
135 ret = copyin_to_xsaves(kbuf, ubuf, xsave); 138 if (kbuf)
136 else 139 ret = copy_kernel_to_xstate(xsave, kbuf);
140 else
141 ret = copy_user_to_xstate(xsave, ubuf);
142 } else {
137 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); 143 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
138 144 if (!ret)
139 /* 145 ret = validate_xstate_header(&xsave->header);
140 * In case of failure, mark all states as init: 146 }
141 */
142 if (ret)
143 fpstate_init(&fpu->state);
144 147
145 /* 148 /*
146 * mxcsr reserved bits must be masked to zero for security reasons. 149 * mxcsr reserved bits must be masked to zero for security reasons.
147 */ 150 */
148 xsave->i387.mxcsr &= mxcsr_feature_mask; 151 xsave->i387.mxcsr &= mxcsr_feature_mask;
149 xsave->header.xfeatures &= xfeatures_mask; 152
150 /* 153 /*
151 * These bits must be zero. 154 * In case of failure, mark all states as init:
152 */ 155 */
153 memset(&xsave->header.reserved, 0, 48); 156 if (ret)
157 fpstate_init(&fpu->state);
154 158
155 return ret; 159 return ret;
156} 160}
@@ -299,7 +303,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
299 struct fpu *fpu = &target->thread.fpu; 303 struct fpu *fpu = &target->thread.fpu;
300 struct user_i387_ia32_struct env; 304 struct user_i387_ia32_struct env;
301 305
302 fpu__activate_fpstate_read(fpu); 306 fpu__prepare_read(fpu);
303 307
304 if (!boot_cpu_has(X86_FEATURE_FPU)) 308 if (!boot_cpu_has(X86_FEATURE_FPU))
305 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); 309 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
@@ -329,7 +333,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
329 struct user_i387_ia32_struct env; 333 struct user_i387_ia32_struct env;
330 int ret; 334 int ret;
331 335
332 fpu__activate_fpstate_write(fpu); 336 fpu__prepare_write(fpu);
333 fpstate_sanitize_xstate(fpu); 337 fpstate_sanitize_xstate(fpu);
334 338
335 if (!boot_cpu_has(X86_FEATURE_FPU)) 339 if (!boot_cpu_has(X86_FEATURE_FPU))
@@ -369,7 +373,7 @@ int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu)
369 struct fpu *fpu = &tsk->thread.fpu; 373 struct fpu *fpu = &tsk->thread.fpu;
370 int fpvalid; 374 int fpvalid;
371 375
372 fpvalid = fpu->fpstate_active; 376 fpvalid = fpu->initialized;
373 if (fpvalid) 377 if (fpvalid)
374 fpvalid = !fpregs_get(tsk, NULL, 378 fpvalid = !fpregs_get(tsk, NULL,
375 0, sizeof(struct user_i387_ia32_struct), 379 0, sizeof(struct user_i387_ia32_struct),
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 83c23c230b4c..fb639e70048f 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -155,7 +155,8 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
155 */ 155 */
156int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) 156int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
157{ 157{
158 struct xregs_state *xsave = &current->thread.fpu.state.xsave; 158 struct fpu *fpu = &current->thread.fpu;
159 struct xregs_state *xsave = &fpu->state.xsave;
159 struct task_struct *tsk = current; 160 struct task_struct *tsk = current;
160 int ia32_fxstate = (buf != buf_fx); 161 int ia32_fxstate = (buf != buf_fx);
161 162
@@ -170,13 +171,13 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
170 sizeof(struct user_i387_ia32_struct), NULL, 171 sizeof(struct user_i387_ia32_struct), NULL,
171 (struct _fpstate_32 __user *) buf) ? -1 : 1; 172 (struct _fpstate_32 __user *) buf) ? -1 : 1;
172 173
173 if (fpregs_active() || using_compacted_format()) { 174 if (fpu->initialized || using_compacted_format()) {
174 /* Save the live register state to the user directly. */ 175 /* Save the live register state to the user directly. */
175 if (copy_fpregs_to_sigframe(buf_fx)) 176 if (copy_fpregs_to_sigframe(buf_fx))
176 return -1; 177 return -1;
177 /* Update the thread's fxstate to save the fsave header. */ 178 /* Update the thread's fxstate to save the fsave header. */
178 if (ia32_fxstate) 179 if (ia32_fxstate)
179 copy_fxregs_to_kernel(&tsk->thread.fpu); 180 copy_fxregs_to_kernel(fpu);
180 } else { 181 } else {
181 /* 182 /*
182 * It is a *bug* if kernel uses compacted-format for xsave 183 * It is a *bug* if kernel uses compacted-format for xsave
@@ -189,7 +190,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
189 return -1; 190 return -1;
190 } 191 }
191 192
192 fpstate_sanitize_xstate(&tsk->thread.fpu); 193 fpstate_sanitize_xstate(fpu);
193 if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size)) 194 if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size))
194 return -1; 195 return -1;
195 } 196 }
@@ -213,8 +214,11 @@ sanitize_restored_xstate(struct task_struct *tsk,
213 struct xstate_header *header = &xsave->header; 214 struct xstate_header *header = &xsave->header;
214 215
215 if (use_xsave()) { 216 if (use_xsave()) {
216 /* These bits must be zero. */ 217 /*
217 memset(header->reserved, 0, 48); 218 * Note: we don't need to zero the reserved bits in the
219 * xstate_header here because we either didn't copy them at all,
220 * or we checked earlier that they aren't set.
221 */
218 222
219 /* 223 /*
220 * Init the state that is not present in the memory 224 * Init the state that is not present in the memory
@@ -223,7 +227,7 @@ sanitize_restored_xstate(struct task_struct *tsk,
223 if (fx_only) 227 if (fx_only)
224 header->xfeatures = XFEATURE_MASK_FPSSE; 228 header->xfeatures = XFEATURE_MASK_FPSSE;
225 else 229 else
226 header->xfeatures &= (xfeatures_mask & xfeatures); 230 header->xfeatures &= xfeatures;
227 } 231 }
228 232
229 if (use_fxsr()) { 233 if (use_fxsr()) {
@@ -279,7 +283,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
279 if (!access_ok(VERIFY_READ, buf, size)) 283 if (!access_ok(VERIFY_READ, buf, size))
280 return -EACCES; 284 return -EACCES;
281 285
282 fpu__activate_curr(fpu); 286 fpu__initialize(fpu);
283 287
284 if (!static_cpu_has(X86_FEATURE_FPU)) 288 if (!static_cpu_has(X86_FEATURE_FPU))
285 return fpregs_soft_set(current, NULL, 289 return fpregs_soft_set(current, NULL,
@@ -307,28 +311,29 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
307 /* 311 /*
308 * For 32-bit frames with fxstate, copy the user state to the 312 * For 32-bit frames with fxstate, copy the user state to the
309 * thread's fpu state, reconstruct fxstate from the fsave 313 * thread's fpu state, reconstruct fxstate from the fsave
310 * header. Sanitize the copied state etc. 314 * header. Validate and sanitize the copied state.
311 */ 315 */
312 struct fpu *fpu = &tsk->thread.fpu; 316 struct fpu *fpu = &tsk->thread.fpu;
313 struct user_i387_ia32_struct env; 317 struct user_i387_ia32_struct env;
314 int err = 0; 318 int err = 0;
315 319
316 /* 320 /*
317 * Drop the current fpu which clears fpu->fpstate_active. This ensures 321 * Drop the current fpu which clears fpu->initialized. This ensures
318 * that any context-switch during the copy of the new state, 322 * that any context-switch during the copy of the new state,
319 * avoids the intermediate state from getting restored/saved. 323 * avoids the intermediate state from getting restored/saved.
320 * Thus avoiding the new restored state from getting corrupted. 324 * Thus avoiding the new restored state from getting corrupted.
321 * We will be ready to restore/save the state only after 325 * We will be ready to restore/save the state only after
322 * fpu->fpstate_active is again set. 326 * fpu->initialized is again set.
323 */ 327 */
324 fpu__drop(fpu); 328 fpu__drop(fpu);
325 329
326 if (using_compacted_format()) { 330 if (using_compacted_format()) {
327 err = copyin_to_xsaves(NULL, buf_fx, 331 err = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
328 &fpu->state.xsave);
329 } else { 332 } else {
330 err = __copy_from_user(&fpu->state.xsave, 333 err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
331 buf_fx, state_size); 334
335 if (!err && state_size > offsetof(struct xregs_state, header))
336 err = validate_xstate_header(&fpu->state.xsave.header);
332 } 337 }
333 338
334 if (err || __copy_from_user(&env, buf, sizeof(env))) { 339 if (err || __copy_from_user(&env, buf, sizeof(env))) {
@@ -339,7 +344,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
339 sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); 344 sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
340 } 345 }
341 346
342 fpu->fpstate_active = 1; 347 fpu->initialized = 1;
343 preempt_disable(); 348 preempt_disable();
344 fpu__restore(fpu); 349 fpu__restore(fpu);
345 preempt_enable(); 350 preempt_enable();
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c24ac1efb12d..f1d5476c9022 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -483,6 +483,30 @@ int using_compacted_format(void)
483 return boot_cpu_has(X86_FEATURE_XSAVES); 483 return boot_cpu_has(X86_FEATURE_XSAVES);
484} 484}
485 485
486/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
487int validate_xstate_header(const struct xstate_header *hdr)
488{
489 /* No unknown or supervisor features may be set */
490 if (hdr->xfeatures & (~xfeatures_mask | XFEATURE_MASK_SUPERVISOR))
491 return -EINVAL;
492
493 /* Userspace must use the uncompacted format */
494 if (hdr->xcomp_bv)
495 return -EINVAL;
496
497 /*
498 * If 'reserved' is shrunken to add a new field, make sure to validate
499 * that new field here!
500 */
501 BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
502
503 /* No reserved bits may be set */
504 if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
505 return -EINVAL;
506
507 return 0;
508}
509
486static void __xstate_dump_leaves(void) 510static void __xstate_dump_leaves(void)
487{ 511{
488 int i; 512 int i;
@@ -867,7 +891,7 @@ const void *get_xsave_field_ptr(int xsave_state)
867{ 891{
868 struct fpu *fpu = &current->thread.fpu; 892 struct fpu *fpu = &current->thread.fpu;
869 893
870 if (!fpu->fpstate_active) 894 if (!fpu->initialized)
871 return NULL; 895 return NULL;
872 /* 896 /*
873 * fpu__save() takes the CPU's xstate registers 897 * fpu__save() takes the CPU's xstate registers
@@ -921,38 +945,129 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
921#endif /* ! CONFIG_ARCH_HAS_PKEYS */ 945#endif /* ! CONFIG_ARCH_HAS_PKEYS */
922 946
923/* 947/*
948 * Weird legacy quirk: SSE and YMM states store information in the
949 * MXCSR and MXCSR_FLAGS fields of the FP area. That means if the FP
950 * area is marked as unused in the xfeatures header, we need to copy
951 * MXCSR and MXCSR_FLAGS if either SSE or YMM are in use.
952 */
953static inline bool xfeatures_mxcsr_quirk(u64 xfeatures)
954{
955 if (!(xfeatures & (XFEATURE_MASK_SSE|XFEATURE_MASK_YMM)))
956 return false;
957
958 if (xfeatures & XFEATURE_MASK_FP)
959 return false;
960
961 return true;
962}
963
964/*
924 * This is similar to user_regset_copyout(), but will not add offset to 965 * This is similar to user_regset_copyout(), but will not add offset to
925 * the source data pointer or increment pos, count, kbuf, and ubuf. 966 * the source data pointer or increment pos, count, kbuf, and ubuf.
926 */ 967 */
927static inline int xstate_copyout(unsigned int pos, unsigned int count, 968static inline void
928 void *kbuf, void __user *ubuf, 969__copy_xstate_to_kernel(void *kbuf, const void *data,
929 const void *data, const int start_pos, 970 unsigned int offset, unsigned int size, unsigned int size_total)
930 const int end_pos)
931{ 971{
932 if ((count == 0) || (pos < start_pos)) 972 if (offset < size_total) {
933 return 0; 973 unsigned int copy = min(size, size_total - offset);
934 974
935 if (end_pos < 0 || pos < end_pos) { 975 memcpy(kbuf + offset, data, copy);
936 unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos)); 976 }
977}
937 978
938 if (kbuf) { 979/*
939 memcpy(kbuf + pos, data, copy); 980 * Convert from kernel XSAVES compacted format to standard format and copy
940 } else { 981 * to a kernel-space ptrace buffer.
941 if (__copy_to_user(ubuf + pos, data, copy)) 982 *
942 return -EFAULT; 983 * It supports partial copy but pos always starts from zero. This is called
984 * from xstateregs_get() and there we check the CPU has XSAVES.
985 */
986int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
987{
988 unsigned int offset, size;
989 struct xstate_header header;
990 int i;
991
992 /*
993 * Currently copy_regset_to_user() starts from pos 0:
994 */
995 if (unlikely(offset_start != 0))
996 return -EFAULT;
997
998 /*
999 * The destination is a ptrace buffer; we put in only user xstates:
1000 */
1001 memset(&header, 0, sizeof(header));
1002 header.xfeatures = xsave->header.xfeatures;
1003 header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
1004
1005 /*
1006 * Copy xregs_state->header:
1007 */
1008 offset = offsetof(struct xregs_state, header);
1009 size = sizeof(header);
1010
1011 __copy_xstate_to_kernel(kbuf, &header, offset, size, size_total);
1012
1013 for (i = 0; i < XFEATURE_MAX; i++) {
1014 /*
1015 * Copy only in-use xstates:
1016 */
1017 if ((header.xfeatures >> i) & 1) {
1018 void *src = __raw_xsave_addr(xsave, 1 << i);
1019
1020 offset = xstate_offsets[i];
1021 size = xstate_sizes[i];
1022
1023 /* The next component has to fit fully into the output buffer: */
1024 if (offset + size > size_total)
1025 break;
1026
1027 __copy_xstate_to_kernel(kbuf, src, offset, size, size_total);
943 } 1028 }
1029
1030 }
1031
1032 if (xfeatures_mxcsr_quirk(header.xfeatures)) {
1033 offset = offsetof(struct fxregs_state, mxcsr);
1034 size = MXCSR_AND_FLAGS_SIZE;
1035 __copy_xstate_to_kernel(kbuf, &xsave->i387.mxcsr, offset, size, size_total);
1036 }
1037
1038 /*
1039 * Fill xsave->i387.sw_reserved value for ptrace frame:
1040 */
1041 offset = offsetof(struct fxregs_state, sw_reserved);
1042 size = sizeof(xstate_fx_sw_bytes);
1043
1044 __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total);
1045
1046 return 0;
1047}
1048
1049static inline int
1050__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, unsigned int size_total)
1051{
1052 if (!size)
1053 return 0;
1054
1055 if (offset < size_total) {
1056 unsigned int copy = min(size, size_total - offset);
1057
1058 if (__copy_to_user(ubuf + offset, data, copy))
1059 return -EFAULT;
944 } 1060 }
945 return 0; 1061 return 0;
946} 1062}
947 1063
948/* 1064/*
949 * Convert from kernel XSAVES compacted format to standard format and copy 1065 * Convert from kernel XSAVES compacted format to standard format and copy
950 * to a ptrace buffer. It supports partial copy but pos always starts from 1066 * to a user-space buffer. It supports partial copy but pos always starts from
951 * zero. This is called from xstateregs_get() and there we check the CPU 1067 * zero. This is called from xstateregs_get() and there we check the CPU
952 * has XSAVES. 1068 * has XSAVES.
953 */ 1069 */
954int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf, 1070int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
955 void __user *ubuf, struct xregs_state *xsave)
956{ 1071{
957 unsigned int offset, size; 1072 unsigned int offset, size;
958 int ret, i; 1073 int ret, i;
@@ -961,7 +1076,7 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
961 /* 1076 /*
962 * Currently copy_regset_to_user() starts from pos 0: 1077 * Currently copy_regset_to_user() starts from pos 0:
963 */ 1078 */
964 if (unlikely(pos != 0)) 1079 if (unlikely(offset_start != 0))
965 return -EFAULT; 1080 return -EFAULT;
966 1081
967 /* 1082 /*
@@ -977,8 +1092,7 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
977 offset = offsetof(struct xregs_state, header); 1092 offset = offsetof(struct xregs_state, header);
978 size = sizeof(header); 1093 size = sizeof(header);
979 1094
980 ret = xstate_copyout(offset, size, kbuf, ubuf, &header, 0, count); 1095 ret = __copy_xstate_to_user(ubuf, &header, offset, size, size_total);
981
982 if (ret) 1096 if (ret)
983 return ret; 1097 return ret;
984 1098
@@ -992,25 +1106,30 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
992 offset = xstate_offsets[i]; 1106 offset = xstate_offsets[i];
993 size = xstate_sizes[i]; 1107 size = xstate_sizes[i];
994 1108
995 ret = xstate_copyout(offset, size, kbuf, ubuf, src, 0, count); 1109 /* The next component has to fit fully into the output buffer: */
1110 if (offset + size > size_total)
1111 break;
996 1112
1113 ret = __copy_xstate_to_user(ubuf, src, offset, size, size_total);
997 if (ret) 1114 if (ret)
998 return ret; 1115 return ret;
999
1000 if (offset + size >= count)
1001 break;
1002 } 1116 }
1003 1117
1004 } 1118 }
1005 1119
1120 if (xfeatures_mxcsr_quirk(header.xfeatures)) {
1121 offset = offsetof(struct fxregs_state, mxcsr);
1122 size = MXCSR_AND_FLAGS_SIZE;
1123 __copy_xstate_to_user(ubuf, &xsave->i387.mxcsr, offset, size, size_total);
1124 }
1125
1006 /* 1126 /*
1007 * Fill xsave->i387.sw_reserved value for ptrace frame: 1127 * Fill xsave->i387.sw_reserved value for ptrace frame:
1008 */ 1128 */
1009 offset = offsetof(struct fxregs_state, sw_reserved); 1129 offset = offsetof(struct fxregs_state, sw_reserved);
1010 size = sizeof(xstate_fx_sw_bytes); 1130 size = sizeof(xstate_fx_sw_bytes);
1011 1131
1012 ret = xstate_copyout(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count); 1132 ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, size_total);
1013
1014 if (ret) 1133 if (ret)
1015 return ret; 1134 return ret;
1016 1135
@@ -1018,55 +1137,98 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
1018} 1137}
1019 1138
1020/* 1139/*
1021 * Convert from a ptrace standard-format buffer to kernel XSAVES format 1140 * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format
1022 * and copy to the target thread. This is called from xstateregs_set() and 1141 * and copy to the target thread. This is called from xstateregs_set().
1023 * there we check the CPU has XSAVES and a whole standard-sized buffer
1024 * exists.
1025 */ 1142 */
1026int copyin_to_xsaves(const void *kbuf, const void __user *ubuf, 1143int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
1027 struct xregs_state *xsave)
1028{ 1144{
1029 unsigned int offset, size; 1145 unsigned int offset, size;
1030 int i; 1146 int i;
1031 u64 xfeatures; 1147 struct xstate_header hdr;
1032 u64 allowed_features;
1033 1148
1034 offset = offsetof(struct xregs_state, header); 1149 offset = offsetof(struct xregs_state, header);
1035 size = sizeof(xfeatures); 1150 size = sizeof(hdr);
1036 1151
1037 if (kbuf) { 1152 memcpy(&hdr, kbuf + offset, size);
1038 memcpy(&xfeatures, kbuf + offset, size); 1153
1039 } else { 1154 if (validate_xstate_header(&hdr))
1040 if (__copy_from_user(&xfeatures, ubuf + offset, size)) 1155 return -EINVAL;
1041 return -EFAULT; 1156
1157 for (i = 0; i < XFEATURE_MAX; i++) {
1158 u64 mask = ((u64)1 << i);
1159
1160 if (hdr.xfeatures & mask) {
1161 void *dst = __raw_xsave_addr(xsave, 1 << i);
1162
1163 offset = xstate_offsets[i];
1164 size = xstate_sizes[i];
1165
1166 memcpy(dst, kbuf + offset, size);
1167 }
1168 }
1169
1170 if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
1171 offset = offsetof(struct fxregs_state, mxcsr);
1172 size = MXCSR_AND_FLAGS_SIZE;
1173 memcpy(&xsave->i387.mxcsr, kbuf + offset, size);
1042 } 1174 }
1043 1175
1044 /* 1176 /*
1045 * Reject if the user sets any disabled or supervisor features: 1177 * The state that came in from userspace was user-state only.
1178 * Mask all the user states out of 'xfeatures':
1179 */
1180 xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
1181
1182 /*
1183 * Add back in the features that came in from userspace:
1046 */ 1184 */
1047 allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR; 1185 xsave->header.xfeatures |= hdr.xfeatures;
1048 1186
1049 if (xfeatures & ~allowed_features) 1187 return 0;
1188}
1189
1190/*
1191 * Convert from a ptrace or sigreturn standard-format user-space buffer to
1192 * kernel XSAVES format and copy to the target thread. This is called from
1193 * xstateregs_set(), as well as potentially from the sigreturn() and
1194 * rt_sigreturn() system calls.
1195 */
1196int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
1197{
1198 unsigned int offset, size;
1199 int i;
1200 struct xstate_header hdr;
1201
1202 offset = offsetof(struct xregs_state, header);
1203 size = sizeof(hdr);
1204
1205 if (__copy_from_user(&hdr, ubuf + offset, size))
1206 return -EFAULT;
1207
1208 if (validate_xstate_header(&hdr))
1050 return -EINVAL; 1209 return -EINVAL;
1051 1210
1052 for (i = 0; i < XFEATURE_MAX; i++) { 1211 for (i = 0; i < XFEATURE_MAX; i++) {
1053 u64 mask = ((u64)1 << i); 1212 u64 mask = ((u64)1 << i);
1054 1213
1055 if (xfeatures & mask) { 1214 if (hdr.xfeatures & mask) {
1056 void *dst = __raw_xsave_addr(xsave, 1 << i); 1215 void *dst = __raw_xsave_addr(xsave, 1 << i);
1057 1216
1058 offset = xstate_offsets[i]; 1217 offset = xstate_offsets[i];
1059 size = xstate_sizes[i]; 1218 size = xstate_sizes[i];
1060 1219
1061 if (kbuf) { 1220 if (__copy_from_user(dst, ubuf + offset, size))
1062 memcpy(dst, kbuf + offset, size); 1221 return -EFAULT;
1063 } else {
1064 if (__copy_from_user(dst, ubuf + offset, size))
1065 return -EFAULT;
1066 }
1067 } 1222 }
1068 } 1223 }
1069 1224
1225 if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
1226 offset = offsetof(struct fxregs_state, mxcsr);
1227 size = MXCSR_AND_FLAGS_SIZE;
1228 if (__copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size))
1229 return -EFAULT;
1230 }
1231
1070 /* 1232 /*
1071 * The state that came in from userspace was user-state only. 1233 * The state that came in from userspace was user-state only.
1072 * Mask all the user states out of 'xfeatures': 1234 * Mask all the user states out of 'xfeatures':
@@ -1076,7 +1238,7 @@ int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
1076 /* 1238 /*
1077 * Add back in the features that came in from userspace: 1239 * Add back in the features that came in from userspace:
1078 */ 1240 */
1079 xsave->header.xfeatures |= xfeatures; 1241 xsave->header.xfeatures |= hdr.xfeatures;
1080 1242
1081 return 0; 1243 return 0;
1082} 1244}
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 1f38d9a4d9de..d4eb450144fd 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -64,7 +64,7 @@ static void call_on_stack(void *func, void *stack)
64 64
65static inline void *current_stack(void) 65static inline void *current_stack(void)
66{ 66{
67 return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1)); 67 return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
68} 68}
69 69
70static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) 70static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
@@ -88,7 +88,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
88 88
89 /* Save the next esp at the bottom of the stack */ 89 /* Save the next esp at the bottom of the stack */
90 prev_esp = (u32 *)irqstk; 90 prev_esp = (u32 *)irqstk;
91 *prev_esp = current_stack_pointer(); 91 *prev_esp = current_stack_pointer;
92 92
93 if (unlikely(overflow)) 93 if (unlikely(overflow))
94 call_on_stack(print_stack_overflow, isp); 94 call_on_stack(print_stack_overflow, isp);
@@ -139,7 +139,7 @@ void do_softirq_own_stack(void)
139 139
140 /* Push the previous esp onto the stack */ 140 /* Push the previous esp onto the stack */
141 prev_esp = (u32 *)irqstk; 141 prev_esp = (u32 *)irqstk;
142 *prev_esp = current_stack_pointer(); 142 *prev_esp = current_stack_pointer;
143 143
144 call_on_stack(__do_softirq, isp); 144 call_on_stack(__do_softirq, isp);
145} 145}
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index 4b0592ca9e47..8c1cc08f514f 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -299,7 +299,7 @@ static int __init create_setup_data_nodes(struct kobject *parent)
299 return 0; 299 return 0;
300 300
301out_clean_nodes: 301out_clean_nodes:
302 for (j = i - 1; j > 0; j--) 302 for (j = i - 1; j >= 0; j--)
303 cleanup_setup_data_node(*(kobjp + j)); 303 cleanup_setup_data_node(*(kobjp + j));
304 kfree(kobjp); 304 kfree(kobjp);
305out_setup_data_kobj: 305out_setup_data_kobj:
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index aa60a08b65b1..e675704fa6f7 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -140,7 +140,8 @@ void kvm_async_pf_task_wait(u32 token)
140 140
141 n.token = token; 141 n.token = token;
142 n.cpu = smp_processor_id(); 142 n.cpu = smp_processor_id();
143 n.halted = is_idle_task(current) || preempt_count() > 1; 143 n.halted = is_idle_task(current) || preempt_count() > 1 ||
144 rcu_preempt_depth();
144 init_swait_queue_head(&n.wq); 145 init_swait_queue_head(&n.wq);
145 hlist_add_head(&n.link, &b->list); 146 hlist_add_head(&n.link, &b->list);
146 raw_spin_unlock(&b->lock); 147 raw_spin_unlock(&b->lock);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index e04442345fc0..4e188fda5961 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -263,7 +263,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
263 sp = (unsigned long) ka->sa.sa_restorer; 263 sp = (unsigned long) ka->sa.sa_restorer;
264 } 264 }
265 265
266 if (fpu->fpstate_active) { 266 if (fpu->initialized) {
267 sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32), 267 sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
268 &buf_fx, &math_size); 268 &buf_fx, &math_size);
269 *fpstate = (void __user *)sp; 269 *fpstate = (void __user *)sp;
@@ -279,7 +279,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
279 return (void __user *)-1L; 279 return (void __user *)-1L;
280 280
281 /* save i387 and extended state */ 281 /* save i387 and extended state */
282 if (fpu->fpstate_active && 282 if (fpu->initialized &&
283 copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0) 283 copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0)
284 return (void __user *)-1L; 284 return (void __user *)-1L;
285 285
@@ -755,7 +755,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
755 /* 755 /*
756 * Ensure the signal handler starts with the new fpu state. 756 * Ensure the signal handler starts with the new fpu state.
757 */ 757 */
758 if (fpu->fpstate_active) 758 if (fpu->initialized)
759 fpu__clear(fpu); 759 fpu__clear(fpu);
760 } 760 }
761 signal_setup_done(failed, ksig, stepping); 761 signal_setup_done(failed, ksig, stepping);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 34ea3651362e..67db4f43309e 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -142,7 +142,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
142 * from double_fault. 142 * from double_fault.
143 */ 143 */
144 BUG_ON((unsigned long)(current_top_of_stack() - 144 BUG_ON((unsigned long)(current_top_of_stack() -
145 current_stack_pointer()) >= THREAD_SIZE); 145 current_stack_pointer) >= THREAD_SIZE);
146 146
147 preempt_enable_no_resched(); 147 preempt_enable_no_resched();
148} 148}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6970249c09fc..a2b804e10c95 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -200,6 +200,8 @@ struct loaded_vmcs {
200 int cpu; 200 int cpu;
201 bool launched; 201 bool launched;
202 bool nmi_known_unmasked; 202 bool nmi_known_unmasked;
203 unsigned long vmcs_host_cr3; /* May not match real cr3 */
204 unsigned long vmcs_host_cr4; /* May not match real cr4 */
203 struct list_head loaded_vmcss_on_cpu_link; 205 struct list_head loaded_vmcss_on_cpu_link;
204}; 206};
205 207
@@ -600,8 +602,6 @@ struct vcpu_vmx {
600 int gs_ldt_reload_needed; 602 int gs_ldt_reload_needed;
601 int fs_reload_needed; 603 int fs_reload_needed;
602 u64 msr_host_bndcfgs; 604 u64 msr_host_bndcfgs;
603 unsigned long vmcs_host_cr3; /* May not match real cr3 */
604 unsigned long vmcs_host_cr4; /* May not match real cr4 */
605 } host_state; 605 } host_state;
606 struct { 606 struct {
607 int vm86_active; 607 int vm86_active;
@@ -2202,46 +2202,44 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
2202 struct pi_desc old, new; 2202 struct pi_desc old, new;
2203 unsigned int dest; 2203 unsigned int dest;
2204 2204
2205 if (!kvm_arch_has_assigned_device(vcpu->kvm) || 2205 /*
2206 !irq_remapping_cap(IRQ_POSTING_CAP) || 2206 * In case of hot-plug or hot-unplug, we may have to undo
2207 !kvm_vcpu_apicv_active(vcpu)) 2207 * vmx_vcpu_pi_put even if there is no assigned device. And we
2208 * always keep PI.NDST up to date for simplicity: it makes the
2209 * code easier, and CPU migration is not a fast path.
2210 */
2211 if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
2208 return; 2212 return;
2209 2213
2214 /*
2215 * First handle the simple case where no cmpxchg is necessary; just
2216 * allow posting non-urgent interrupts.
2217 *
2218 * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
2219 * PI.NDST: pi_post_block will do it for us and the wakeup_handler
2220 * expects the VCPU to be on the blocked_vcpu_list that matches
2221 * PI.NDST.
2222 */
2223 if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
2224 vcpu->cpu == cpu) {
2225 pi_clear_sn(pi_desc);
2226 return;
2227 }
2228
2229 /* The full case. */
2210 do { 2230 do {
2211 old.control = new.control = pi_desc->control; 2231 old.control = new.control = pi_desc->control;
2212 2232
2213 /* 2233 dest = cpu_physical_id(cpu);
2214 * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
2215 * are two possible cases:
2216 * 1. After running 'pre_block', context switch
2217 * happened. For this case, 'sn' was set in
2218 * vmx_vcpu_put(), so we need to clear it here.
2219 * 2. After running 'pre_block', we were blocked,
2220 * and woken up by some other guy. For this case,
2221 * we don't need to do anything, 'pi_post_block'
2222 * will do everything for us. However, we cannot
2223 * check whether it is case #1 or case #2 here
2224 * (maybe, not needed), so we also clear sn here,
2225 * I think it is not a big deal.
2226 */
2227 if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) {
2228 if (vcpu->cpu != cpu) {
2229 dest = cpu_physical_id(cpu);
2230
2231 if (x2apic_enabled())
2232 new.ndst = dest;
2233 else
2234 new.ndst = (dest << 8) & 0xFF00;
2235 }
2236 2234
2237 /* set 'NV' to 'notification vector' */ 2235 if (x2apic_enabled())
2238 new.nv = POSTED_INTR_VECTOR; 2236 new.ndst = dest;
2239 } 2237 else
2238 new.ndst = (dest << 8) & 0xFF00;
2240 2239
2241 /* Allow posting non-urgent interrupts */
2242 new.sn = 0; 2240 new.sn = 0;
2243 } while (cmpxchg(&pi_desc->control, old.control, 2241 } while (cmpxchg64(&pi_desc->control, old.control,
2244 new.control) != old.control); 2242 new.control) != old.control);
2245} 2243}
2246 2244
2247static void decache_tsc_multiplier(struct vcpu_vmx *vmx) 2245static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
@@ -5178,12 +5176,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
5178 */ 5176 */
5179 cr3 = __read_cr3(); 5177 cr3 = __read_cr3();
5180 vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ 5178 vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */
5181 vmx->host_state.vmcs_host_cr3 = cr3; 5179 vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
5182 5180
5183 /* Save the most likely value for this task's CR4 in the VMCS. */ 5181 /* Save the most likely value for this task's CR4 in the VMCS. */
5184 cr4 = cr4_read_shadow(); 5182 cr4 = cr4_read_shadow();
5185 vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ 5183 vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */
5186 vmx->host_state.vmcs_host_cr4 = cr4; 5184 vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
5187 5185
5188 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ 5186 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
5189#ifdef CONFIG_X86_64 5187#ifdef CONFIG_X86_64
@@ -9273,15 +9271,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
9273 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); 9271 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
9274 9272
9275 cr3 = __get_current_cr3_fast(); 9273 cr3 = __get_current_cr3_fast();
9276 if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) { 9274 if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) {
9277 vmcs_writel(HOST_CR3, cr3); 9275 vmcs_writel(HOST_CR3, cr3);
9278 vmx->host_state.vmcs_host_cr3 = cr3; 9276 vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
9279 } 9277 }
9280 9278
9281 cr4 = cr4_read_shadow(); 9279 cr4 = cr4_read_shadow();
9282 if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { 9280 if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) {
9283 vmcs_writel(HOST_CR4, cr4); 9281 vmcs_writel(HOST_CR4, cr4);
9284 vmx->host_state.vmcs_host_cr4 = cr4; 9282 vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
9285 } 9283 }
9286 9284
9287 /* When single-stepping over STI and MOV SS, we must clear the 9285 /* When single-stepping over STI and MOV SS, we must clear the
@@ -9591,6 +9589,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
9591 9589
9592 vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; 9590 vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
9593 9591
9592 /*
9593 * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
9594 * or POSTED_INTR_WAKEUP_VECTOR.
9595 */
9596 vmx->pi_desc.nv = POSTED_INTR_VECTOR;
9597 vmx->pi_desc.sn = 1;
9598
9594 return &vmx->vcpu; 9599 return &vmx->vcpu;
9595 9600
9596free_vmcs: 9601free_vmcs:
@@ -9839,7 +9844,8 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
9839 9844
9840 WARN_ON(!is_guest_mode(vcpu)); 9845 WARN_ON(!is_guest_mode(vcpu));
9841 9846
9842 if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) { 9847 if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
9848 !to_vmx(vcpu)->nested.nested_run_pending) {
9843 vmcs12->vm_exit_intr_error_code = fault->error_code; 9849 vmcs12->vm_exit_intr_error_code = fault->error_code;
9844 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, 9850 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
9845 PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | 9851 PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
@@ -11704,6 +11710,37 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
11704 kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); 11710 kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
11705} 11711}
11706 11712
11713static void __pi_post_block(struct kvm_vcpu *vcpu)
11714{
11715 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
11716 struct pi_desc old, new;
11717 unsigned int dest;
11718
11719 do {
11720 old.control = new.control = pi_desc->control;
11721 WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
11722 "Wakeup handler not enabled while the VCPU is blocked\n");
11723
11724 dest = cpu_physical_id(vcpu->cpu);
11725
11726 if (x2apic_enabled())
11727 new.ndst = dest;
11728 else
11729 new.ndst = (dest << 8) & 0xFF00;
11730
11731 /* set 'NV' to 'notification vector' */
11732 new.nv = POSTED_INTR_VECTOR;
11733 } while (cmpxchg64(&pi_desc->control, old.control,
11734 new.control) != old.control);
11735
11736 if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
11737 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
11738 list_del(&vcpu->blocked_vcpu_list);
11739 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
11740 vcpu->pre_pcpu = -1;
11741 }
11742}
11743
11707/* 11744/*
11708 * This routine does the following things for vCPU which is going 11745 * This routine does the following things for vCPU which is going
11709 * to be blocked if VT-d PI is enabled. 11746 * to be blocked if VT-d PI is enabled.
@@ -11719,7 +11756,6 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
11719 */ 11756 */
11720static int pi_pre_block(struct kvm_vcpu *vcpu) 11757static int pi_pre_block(struct kvm_vcpu *vcpu)
11721{ 11758{
11722 unsigned long flags;
11723 unsigned int dest; 11759 unsigned int dest;
11724 struct pi_desc old, new; 11760 struct pi_desc old, new;
11725 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); 11761 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
@@ -11729,34 +11765,20 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
11729 !kvm_vcpu_apicv_active(vcpu)) 11765 !kvm_vcpu_apicv_active(vcpu))
11730 return 0; 11766 return 0;
11731 11767
11732 vcpu->pre_pcpu = vcpu->cpu; 11768 WARN_ON(irqs_disabled());
11733 spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, 11769 local_irq_disable();
11734 vcpu->pre_pcpu), flags); 11770 if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
11735 list_add_tail(&vcpu->blocked_vcpu_list, 11771 vcpu->pre_pcpu = vcpu->cpu;
11736 &per_cpu(blocked_vcpu_on_cpu, 11772 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
11737 vcpu->pre_pcpu)); 11773 list_add_tail(&vcpu->blocked_vcpu_list,
11738 spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock, 11774 &per_cpu(blocked_vcpu_on_cpu,
11739 vcpu->pre_pcpu), flags); 11775 vcpu->pre_pcpu));
11776 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
11777 }
11740 11778
11741 do { 11779 do {
11742 old.control = new.control = pi_desc->control; 11780 old.control = new.control = pi_desc->control;
11743 11781
11744 /*
11745 * We should not block the vCPU if
11746 * an interrupt is posted for it.
11747 */
11748 if (pi_test_on(pi_desc) == 1) {
11749 spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
11750 vcpu->pre_pcpu), flags);
11751 list_del(&vcpu->blocked_vcpu_list);
11752 spin_unlock_irqrestore(
11753 &per_cpu(blocked_vcpu_on_cpu_lock,
11754 vcpu->pre_pcpu), flags);
11755 vcpu->pre_pcpu = -1;
11756
11757 return 1;
11758 }
11759
11760 WARN((pi_desc->sn == 1), 11782 WARN((pi_desc->sn == 1),
11761 "Warning: SN field of posted-interrupts " 11783 "Warning: SN field of posted-interrupts "
11762 "is set before blocking\n"); 11784 "is set before blocking\n");
@@ -11778,10 +11800,15 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
11778 11800
11779 /* set 'NV' to 'wakeup vector' */ 11801 /* set 'NV' to 'wakeup vector' */
11780 new.nv = POSTED_INTR_WAKEUP_VECTOR; 11802 new.nv = POSTED_INTR_WAKEUP_VECTOR;
11781 } while (cmpxchg(&pi_desc->control, old.control, 11803 } while (cmpxchg64(&pi_desc->control, old.control,
11782 new.control) != old.control); 11804 new.control) != old.control);
11783 11805
11784 return 0; 11806 /* We should not block the vCPU if an interrupt is posted for it. */
11807 if (pi_test_on(pi_desc) == 1)
11808 __pi_post_block(vcpu);
11809
11810 local_irq_enable();
11811 return (vcpu->pre_pcpu == -1);
11785} 11812}
11786 11813
11787static int vmx_pre_block(struct kvm_vcpu *vcpu) 11814static int vmx_pre_block(struct kvm_vcpu *vcpu)
@@ -11797,44 +11824,13 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
11797 11824
11798static void pi_post_block(struct kvm_vcpu *vcpu) 11825static void pi_post_block(struct kvm_vcpu *vcpu)
11799{ 11826{
11800 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); 11827 if (vcpu->pre_pcpu == -1)
11801 struct pi_desc old, new;
11802 unsigned int dest;
11803 unsigned long flags;
11804
11805 if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
11806 !irq_remapping_cap(IRQ_POSTING_CAP) ||
11807 !kvm_vcpu_apicv_active(vcpu))
11808 return; 11828 return;
11809 11829
11810 do { 11830 WARN_ON(irqs_disabled());
11811 old.control = new.control = pi_desc->control; 11831 local_irq_disable();
11812 11832 __pi_post_block(vcpu);
11813 dest = cpu_physical_id(vcpu->cpu); 11833 local_irq_enable();
11814
11815 if (x2apic_enabled())
11816 new.ndst = dest;
11817 else
11818 new.ndst = (dest << 8) & 0xFF00;
11819
11820 /* Allow posting non-urgent interrupts */
11821 new.sn = 0;
11822
11823 /* set 'NV' to 'notification vector' */
11824 new.nv = POSTED_INTR_VECTOR;
11825 } while (cmpxchg(&pi_desc->control, old.control,
11826 new.control) != old.control);
11827
11828 if(vcpu->pre_pcpu != -1) {
11829 spin_lock_irqsave(
11830 &per_cpu(blocked_vcpu_on_cpu_lock,
11831 vcpu->pre_pcpu), flags);
11832 list_del(&vcpu->blocked_vcpu_list);
11833 spin_unlock_irqrestore(
11834 &per_cpu(blocked_vcpu_on_cpu_lock,
11835 vcpu->pre_pcpu), flags);
11836 vcpu->pre_pcpu = -1;
11837 }
11838} 11834}
11839 11835
11840static void vmx_post_block(struct kvm_vcpu *vcpu) 11836static void vmx_post_block(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cd17b7d9a107..03869eb7fcd6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7225,7 +7225,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
7225 int r; 7225 int r;
7226 sigset_t sigsaved; 7226 sigset_t sigsaved;
7227 7227
7228 fpu__activate_curr(fpu); 7228 fpu__initialize(fpu);
7229 7229
7230 if (vcpu->sigset_active) 7230 if (vcpu->sigset_active)
7231 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 7231 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index d4a7df2205b8..220638a4cb94 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -114,7 +114,7 @@ void math_emulate(struct math_emu_info *info)
114 struct desc_struct code_descriptor; 114 struct desc_struct code_descriptor;
115 struct fpu *fpu = &current->thread.fpu; 115 struct fpu *fpu = &current->thread.fpu;
116 116
117 fpu__activate_curr(fpu); 117 fpu__initialize(fpu);
118 118
119#ifdef RE_ENTRANT_CHECKING 119#ifdef RE_ENTRANT_CHECKING
120 if (emulating) { 120 if (emulating) {
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index c076f710de4c..c3521e2be396 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -2,6 +2,7 @@
2#include <linux/uaccess.h> 2#include <linux/uaccess.h>
3#include <linux/sched/debug.h> 3#include <linux/sched/debug.h>
4 4
5#include <asm/fpu/internal.h>
5#include <asm/traps.h> 6#include <asm/traps.h>
6#include <asm/kdebug.h> 7#include <asm/kdebug.h>
7 8
@@ -78,6 +79,29 @@ bool ex_handler_refcount(const struct exception_table_entry *fixup,
78} 79}
79EXPORT_SYMBOL_GPL(ex_handler_refcount); 80EXPORT_SYMBOL_GPL(ex_handler_refcount);
80 81
82/*
83 * Handler for when we fail to restore a task's FPU state. We should never get
84 * here because the FPU state of a task using the FPU (task->thread.fpu.state)
85 * should always be valid. However, past bugs have allowed userspace to set
86 * reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn().
87 * These caused XRSTOR to fail when switching to the task, leaking the FPU
88 * registers of the task previously executing on the CPU. Mitigate this class
89 * of vulnerability by restoring from the initial state (essentially, zeroing
90 * out all the FPU registers) if we can't restore from the task's FPU state.
91 */
92bool ex_handler_fprestore(const struct exception_table_entry *fixup,
93 struct pt_regs *regs, int trapnr)
94{
95 regs->ip = ex_fixup_addr(fixup);
96
97 WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
98 (void *)instruction_pointer(regs));
99
100 __copy_kernel_to_fpregs(&init_fpstate, -1);
101 return true;
102}
103EXPORT_SYMBOL_GPL(ex_handler_fprestore);
104
81bool ex_handler_ext(const struct exception_table_entry *fixup, 105bool ex_handler_ext(const struct exception_table_entry *fixup,
82 struct pt_regs *regs, int trapnr) 106 struct pt_regs *regs, int trapnr)
83{ 107{
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 39567b5c33da..e2baeaa053a5 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -192,8 +192,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
192 * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really 192 * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really
193 * faulted on a pte with its pkey=4. 193 * faulted on a pte with its pkey=4.
194 */ 194 */
195static void fill_sig_info_pkey(int si_code, siginfo_t *info, 195static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
196 struct vm_area_struct *vma)
197{ 196{
198 /* This is effectively an #ifdef */ 197 /* This is effectively an #ifdef */
199 if (!boot_cpu_has(X86_FEATURE_OSPKE)) 198 if (!boot_cpu_has(X86_FEATURE_OSPKE))
@@ -209,7 +208,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info,
209 * valid VMA, so we should never reach this without a 208 * valid VMA, so we should never reach this without a
210 * valid VMA. 209 * valid VMA.
211 */ 210 */
212 if (!vma) { 211 if (!pkey) {
213 WARN_ONCE(1, "PKU fault with no VMA passed in"); 212 WARN_ONCE(1, "PKU fault with no VMA passed in");
214 info->si_pkey = 0; 213 info->si_pkey = 0;
215 return; 214 return;
@@ -219,13 +218,12 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info,
219 * absolutely guranteed to be 100% accurate because of 218 * absolutely guranteed to be 100% accurate because of
220 * the race explained above. 219 * the race explained above.
221 */ 220 */
222 info->si_pkey = vma_pkey(vma); 221 info->si_pkey = *pkey;
223} 222}
224 223
225static void 224static void
226force_sig_info_fault(int si_signo, int si_code, unsigned long address, 225force_sig_info_fault(int si_signo, int si_code, unsigned long address,
227 struct task_struct *tsk, struct vm_area_struct *vma, 226 struct task_struct *tsk, u32 *pkey, int fault)
228 int fault)
229{ 227{
230 unsigned lsb = 0; 228 unsigned lsb = 0;
231 siginfo_t info; 229 siginfo_t info;
@@ -240,7 +238,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
240 lsb = PAGE_SHIFT; 238 lsb = PAGE_SHIFT;
241 info.si_addr_lsb = lsb; 239 info.si_addr_lsb = lsb;
242 240
243 fill_sig_info_pkey(si_code, &info, vma); 241 fill_sig_info_pkey(si_code, &info, pkey);
244 242
245 force_sig_info(si_signo, &info, tsk); 243 force_sig_info(si_signo, &info, tsk);
246} 244}
@@ -762,8 +760,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
762 struct task_struct *tsk = current; 760 struct task_struct *tsk = current;
763 unsigned long flags; 761 unsigned long flags;
764 int sig; 762 int sig;
765 /* No context means no VMA to pass down */
766 struct vm_area_struct *vma = NULL;
767 763
768 /* Are we prepared to handle this kernel fault? */ 764 /* Are we prepared to handle this kernel fault? */
769 if (fixup_exception(regs, X86_TRAP_PF)) { 765 if (fixup_exception(regs, X86_TRAP_PF)) {
@@ -788,7 +784,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
788 784
789 /* XXX: hwpoison faults will set the wrong code. */ 785 /* XXX: hwpoison faults will set the wrong code. */
790 force_sig_info_fault(signal, si_code, address, 786 force_sig_info_fault(signal, si_code, address,
791 tsk, vma, 0); 787 tsk, NULL, 0);
792 } 788 }
793 789
794 /* 790 /*
@@ -896,8 +892,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
896 892
897static void 893static void
898__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, 894__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
899 unsigned long address, struct vm_area_struct *vma, 895 unsigned long address, u32 *pkey, int si_code)
900 int si_code)
901{ 896{
902 struct task_struct *tsk = current; 897 struct task_struct *tsk = current;
903 898
@@ -945,7 +940,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
945 tsk->thread.error_code = error_code; 940 tsk->thread.error_code = error_code;
946 tsk->thread.trap_nr = X86_TRAP_PF; 941 tsk->thread.trap_nr = X86_TRAP_PF;
947 942
948 force_sig_info_fault(SIGSEGV, si_code, address, tsk, vma, 0); 943 force_sig_info_fault(SIGSEGV, si_code, address, tsk, pkey, 0);
949 944
950 return; 945 return;
951 } 946 }
@@ -958,9 +953,9 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
958 953
959static noinline void 954static noinline void
960bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, 955bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
961 unsigned long address, struct vm_area_struct *vma) 956 unsigned long address, u32 *pkey)
962{ 957{
963 __bad_area_nosemaphore(regs, error_code, address, vma, SEGV_MAPERR); 958 __bad_area_nosemaphore(regs, error_code, address, pkey, SEGV_MAPERR);
964} 959}
965 960
966static void 961static void
@@ -968,6 +963,10 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
968 unsigned long address, struct vm_area_struct *vma, int si_code) 963 unsigned long address, struct vm_area_struct *vma, int si_code)
969{ 964{
970 struct mm_struct *mm = current->mm; 965 struct mm_struct *mm = current->mm;
966 u32 pkey;
967
968 if (vma)
969 pkey = vma_pkey(vma);
971 970
972 /* 971 /*
973 * Something tried to access memory that isn't in our memory map.. 972 * Something tried to access memory that isn't in our memory map..
@@ -975,7 +974,8 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
975 */ 974 */
976 up_read(&mm->mmap_sem); 975 up_read(&mm->mmap_sem);
977 976
978 __bad_area_nosemaphore(regs, error_code, address, vma, si_code); 977 __bad_area_nosemaphore(regs, error_code, address,
978 (vma) ? &pkey : NULL, si_code);
979} 979}
980 980
981static noinline void 981static noinline void
@@ -1018,7 +1018,7 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
1018 1018
1019static void 1019static void
1020do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, 1020do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
1021 struct vm_area_struct *vma, unsigned int fault) 1021 u32 *pkey, unsigned int fault)
1022{ 1022{
1023 struct task_struct *tsk = current; 1023 struct task_struct *tsk = current;
1024 int code = BUS_ADRERR; 1024 int code = BUS_ADRERR;
@@ -1045,13 +1045,12 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
1045 code = BUS_MCEERR_AR; 1045 code = BUS_MCEERR_AR;
1046 } 1046 }
1047#endif 1047#endif
1048 force_sig_info_fault(SIGBUS, code, address, tsk, vma, fault); 1048 force_sig_info_fault(SIGBUS, code, address, tsk, pkey, fault);
1049} 1049}
1050 1050
1051static noinline void 1051static noinline void
1052mm_fault_error(struct pt_regs *regs, unsigned long error_code, 1052mm_fault_error(struct pt_regs *regs, unsigned long error_code,
1053 unsigned long address, struct vm_area_struct *vma, 1053 unsigned long address, u32 *pkey, unsigned int fault)
1054 unsigned int fault)
1055{ 1054{
1056 if (fatal_signal_pending(current) && !(error_code & PF_USER)) { 1055 if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
1057 no_context(regs, error_code, address, 0, 0); 1056 no_context(regs, error_code, address, 0, 0);
@@ -1075,9 +1074,9 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
1075 } else { 1074 } else {
1076 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| 1075 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
1077 VM_FAULT_HWPOISON_LARGE)) 1076 VM_FAULT_HWPOISON_LARGE))
1078 do_sigbus(regs, error_code, address, vma, fault); 1077 do_sigbus(regs, error_code, address, pkey, fault);
1079 else if (fault & VM_FAULT_SIGSEGV) 1078 else if (fault & VM_FAULT_SIGSEGV)
1080 bad_area_nosemaphore(regs, error_code, address, vma); 1079 bad_area_nosemaphore(regs, error_code, address, pkey);
1081 else 1080 else
1082 BUG(); 1081 BUG();
1083 } 1082 }
@@ -1267,6 +1266,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
1267 struct mm_struct *mm; 1266 struct mm_struct *mm;
1268 int fault, major = 0; 1267 int fault, major = 0;
1269 unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; 1268 unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
1269 u32 pkey;
1270 1270
1271 tsk = current; 1271 tsk = current;
1272 mm = tsk->mm; 1272 mm = tsk->mm;
@@ -1467,9 +1467,10 @@ good_area:
1467 return; 1467 return;
1468 } 1468 }
1469 1469
1470 pkey = vma_pkey(vma);
1470 up_read(&mm->mmap_sem); 1471 up_read(&mm->mmap_sem);
1471 if (unlikely(fault & VM_FAULT_ERROR)) { 1472 if (unlikely(fault & VM_FAULT_ERROR)) {
1472 mm_fault_error(regs, error_code, address, vma, fault); 1473 mm_fault_error(regs, error_code, address, &pkey, fault);
1473 return; 1474 return;
1474 } 1475 }
1475 1476
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 3fcc8e01683b..16c5f37933a2 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -10,6 +10,8 @@
10 * published by the Free Software Foundation. 10 * published by the Free Software Foundation.
11 */ 11 */
12 12
13#define DISABLE_BRANCH_PROFILING
14
13#include <linux/linkage.h> 15#include <linux/linkage.h>
14#include <linux/init.h> 16#include <linux/init.h>
15#include <linux/mm.h> 17#include <linux/mm.h>
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index 2dab69a706ec..d7bc0eea20a5 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -18,7 +18,6 @@
18 18
19#include <asm/cpufeature.h> /* boot_cpu_has, ... */ 19#include <asm/cpufeature.h> /* boot_cpu_has, ... */
20#include <asm/mmu_context.h> /* vma_pkey() */ 20#include <asm/mmu_context.h> /* vma_pkey() */
21#include <asm/fpu/internal.h> /* fpregs_active() */
22 21
23int __execute_only_pkey(struct mm_struct *mm) 22int __execute_only_pkey(struct mm_struct *mm)
24{ 23{
@@ -45,7 +44,7 @@ int __execute_only_pkey(struct mm_struct *mm)
45 */ 44 */
46 preempt_disable(); 45 preempt_disable();
47 if (!need_to_set_mm_pkey && 46 if (!need_to_set_mm_pkey &&
48 fpregs_active() && 47 current->thread.fpu.initialized &&
49 !__pkru_allows_read(read_pkru(), execute_only_pkey)) { 48 !__pkru_allows_read(read_pkru(), execute_only_pkey)) {
50 preempt_enable(); 49 preempt_enable();
51 return execute_only_pkey; 50 return execute_only_pkey;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 93fe97cce581..49d9778376d7 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -191,7 +191,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
191 * mapped in the new pgd, we'll double-fault. Forcibly 191 * mapped in the new pgd, we'll double-fault. Forcibly
192 * map it. 192 * map it.
193 */ 193 */
194 unsigned int index = pgd_index(current_stack_pointer()); 194 unsigned int index = pgd_index(current_stack_pointer);
195 pgd_t *pgd = next->pgd + index; 195 pgd_t *pgd = next->pgd + index;
196 196
197 if (unlikely(pgd_none(*pgd))) 197 if (unlikely(pgd_none(*pgd)))
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 7330cb3b2283..71495f1a86d7 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1238,21 +1238,16 @@ static void __init xen_pagetable_cleanhighmap(void)
1238 * from _brk_limit way up to the max_pfn_mapped (which is the end of 1238 * from _brk_limit way up to the max_pfn_mapped (which is the end of
1239 * the ramdisk). We continue on, erasing PMD entries that point to page 1239 * the ramdisk). We continue on, erasing PMD entries that point to page
1240 * tables - do note that they are accessible at this stage via __va. 1240 * tables - do note that they are accessible at this stage via __va.
1241 * For good measure we also round up to the PMD - which means that if 1241 * As Xen is aligning the memory end to a 4MB boundary, for good
1242 * measure we also round up to PMD_SIZE * 2 - which means that if
1242 * anybody is using __ka address to the initial boot-stack - and try 1243 * anybody is using __ka address to the initial boot-stack - and try
1243 * to use it - they are going to crash. The xen_start_info has been 1244 * to use it - they are going to crash. The xen_start_info has been
1244 * taken care of already in xen_setup_kernel_pagetable. */ 1245 * taken care of already in xen_setup_kernel_pagetable. */
1245 addr = xen_start_info->pt_base; 1246 addr = xen_start_info->pt_base;
1246 size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE); 1247 size = xen_start_info->nr_pt_frames * PAGE_SIZE;
1247 1248
1248 xen_cleanhighmap(addr, addr + size); 1249 xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE * 2));
1249 xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base)); 1250 xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
1250#ifdef DEBUG
1251 /* This is superfluous and is not necessary, but you know what
1252 * lets do it. The MODULES_VADDR -> MODULES_END should be clear of
1253 * anything at this stage. */
1254 xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
1255#endif
1256} 1251}
1257#endif 1252#endif
1258 1253
diff --git a/block/blk-core.c b/block/blk-core.c
index aebe676225e6..048be4aa6024 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -854,6 +854,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
854 854
855 kobject_init(&q->kobj, &blk_queue_ktype); 855 kobject_init(&q->kobj, &blk_queue_ktype);
856 856
857#ifdef CONFIG_BLK_DEV_IO_TRACE
858 mutex_init(&q->blk_trace_mutex);
859#endif
857 mutex_init(&q->sysfs_lock); 860 mutex_init(&q->sysfs_lock);
858 spin_lock_init(&q->__queue_lock); 861 spin_lock_init(&q->__queue_lock);
859 862
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index c82408c7cc3c..dbddff8174e5 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -154,7 +154,6 @@ static int bsg_prepare_job(struct device *dev, struct request *req)
154failjob_rls_rqst_payload: 154failjob_rls_rqst_payload:
155 kfree(job->request_payload.sg_list); 155 kfree(job->request_payload.sg_list);
156failjob_rls_job: 156failjob_rls_job:
157 kfree(job);
158 return -ENOMEM; 157 return -ENOMEM;
159} 158}
160 159
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 86e8fe1adcdb..88c555db4e5d 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -112,7 +112,7 @@ ssize_t part_stat_show(struct device *dev,
112 struct device_attribute *attr, char *buf) 112 struct device_attribute *attr, char *buf)
113{ 113{
114 struct hd_struct *p = dev_to_part(dev); 114 struct hd_struct *p = dev_to_part(dev);
115 struct request_queue *q = dev_to_disk(dev)->queue; 115 struct request_queue *q = part_to_disk(p)->queue;
116 unsigned int inflight[2]; 116 unsigned int inflight[2];
117 int cpu; 117 int cpu;
118 118
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 077f9bad6f44..3c3a37b8503b 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -743,17 +743,19 @@ static int ghes_proc(struct ghes *ghes)
743 } 743 }
744 ghes_do_proc(ghes, ghes->estatus); 744 ghes_do_proc(ghes, ghes->estatus);
745 745
746out:
747 ghes_clear_estatus(ghes);
748
749 if (rc == -ENOENT)
750 return rc;
751
746 /* 752 /*
747 * GHESv2 type HEST entries introduce support for error acknowledgment, 753 * GHESv2 type HEST entries introduce support for error acknowledgment,
748 * so only acknowledge the error if this support is present. 754 * so only acknowledge the error if this support is present.
749 */ 755 */
750 if (is_hest_type_generic_v2(ghes)) { 756 if (is_hest_type_generic_v2(ghes))
751 rc = ghes_ack_error(ghes->generic_v2); 757 return ghes_ack_error(ghes->generic_v2);
752 if (rc) 758
753 return rc;
754 }
755out:
756 ghes_clear_estatus(ghes);
757 return rc; 759 return rc;
758} 760}
759 761
diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index a8cc14fd8ae4..a6de32530693 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -1581,6 +1581,9 @@ static int _opp_set_availability(struct device *dev, unsigned long freq,
1581 1581
1582 opp->available = availability_req; 1582 opp->available = availability_req;
1583 1583
1584 dev_pm_opp_get(opp);
1585 mutex_unlock(&opp_table->lock);
1586
1584 /* Notify the change of the OPP availability */ 1587 /* Notify the change of the OPP availability */
1585 if (availability_req) 1588 if (availability_req)
1586 blocking_notifier_call_chain(&opp_table->head, OPP_EVENT_ENABLE, 1589 blocking_notifier_call_chain(&opp_table->head, OPP_EVENT_ENABLE,
@@ -1589,8 +1592,12 @@ static int _opp_set_availability(struct device *dev, unsigned long freq,
1589 blocking_notifier_call_chain(&opp_table->head, 1592 blocking_notifier_call_chain(&opp_table->head,
1590 OPP_EVENT_DISABLE, opp); 1593 OPP_EVENT_DISABLE, opp);
1591 1594
1595 dev_pm_opp_put(opp);
1596 goto put_table;
1597
1592unlock: 1598unlock:
1593 mutex_unlock(&opp_table->lock); 1599 mutex_unlock(&opp_table->lock);
1600put_table:
1594 dev_pm_opp_put_opp_table(opp_table); 1601 dev_pm_opp_put_opp_table(opp_table);
1595 return r; 1602 return r;
1596} 1603}
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index bbd0d186cfc0..2d7178f7754e 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -342,7 +342,7 @@ static long __brd_direct_access(struct brd_device *brd, pgoff_t pgoff,
342 342
343 if (!brd) 343 if (!brd)
344 return -ENODEV; 344 return -ENODEV;
345 page = brd_insert_page(brd, PFN_PHYS(pgoff) / 512); 345 page = brd_insert_page(brd, (sector_t)pgoff << PAGE_SECTORS_SHIFT);
346 if (!page) 346 if (!page)
347 return -ENOSPC; 347 return -ENOSPC;
348 *kaddr = page_address(page); 348 *kaddr = page_address(page);
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index f68c1d50802f..1f3956702993 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -67,10 +67,8 @@ struct loop_device {
67struct loop_cmd { 67struct loop_cmd {
68 struct kthread_work work; 68 struct kthread_work work;
69 struct request *rq; 69 struct request *rq;
70 union { 70 bool use_aio; /* use AIO interface to handle I/O */
71 bool use_aio; /* use AIO interface to handle I/O */ 71 atomic_t ref; /* only for aio */
72 atomic_t ref; /* only for aio */
73 };
74 long ret; 72 long ret;
75 struct kiocb iocb; 73 struct kiocb iocb;
76 struct bio_vec *bvec; 74 struct bio_vec *bvec;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 2aa87cbdede0..3684e21d543f 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1194,6 +1194,12 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
1194 if (!capable(CAP_SYS_ADMIN)) 1194 if (!capable(CAP_SYS_ADMIN))
1195 return -EPERM; 1195 return -EPERM;
1196 1196
1197 /* The block layer will pass back some non-nbd ioctls in case we have
1198 * special handling for them, but we don't so just return an error.
1199 */
1200 if (_IOC_TYPE(cmd) != 0xab)
1201 return -EINVAL;
1202
1197 mutex_lock(&nbd->config_lock); 1203 mutex_lock(&nbd->config_lock);
1198 1204
1199 /* Don't allow ioctl operations on a nbd device that was created with 1205 /* Don't allow ioctl operations on a nbd device that was created with
diff --git a/drivers/clocksource/numachip.c b/drivers/clocksource/numachip.c
index 6a20dc8b253f..9a7d7f0f23fe 100644
--- a/drivers/clocksource/numachip.c
+++ b/drivers/clocksource/numachip.c
@@ -43,7 +43,7 @@ static int numachip2_set_next_event(unsigned long delta, struct clock_event_devi
43 return 0; 43 return 0;
44} 44}
45 45
46static struct clock_event_device numachip2_clockevent = { 46static const struct clock_event_device numachip2_clockevent __initconst = {
47 .name = "numachip2", 47 .name = "numachip2",
48 .rating = 400, 48 .rating = 400,
49 .set_next_event = numachip2_set_next_event, 49 .set_next_event = numachip2_set_next_event,
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 430edadca527..a753c50e9e41 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -118,6 +118,10 @@ static const struct of_device_id blacklist[] __initconst = {
118 118
119 { .compatible = "sigma,tango4", }, 119 { .compatible = "sigma,tango4", },
120 120
121 { .compatible = "ti,am33xx", },
122 { .compatible = "ti,am43", },
123 { .compatible = "ti,dra7", },
124
121 { } 125 { }
122}; 126};
123 127
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index d228f5a99044..dbbe986f90f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -636,7 +636,194 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev)
636 NUM_BANKS(ADDR_SURF_2_BANK); 636 NUM_BANKS(ADDR_SURF_2_BANK);
637 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 637 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
638 WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]); 638 WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
639 } else if (adev->asic_type == CHIP_OLAND || adev->asic_type == CHIP_HAINAN) { 639 } else if (adev->asic_type == CHIP_OLAND) {
640 tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
641 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
642 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
643 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
644 NUM_BANKS(ADDR_SURF_16_BANK) |
645 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
646 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
647 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
648 tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
649 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
650 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
651 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
652 NUM_BANKS(ADDR_SURF_16_BANK) |
653 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
654 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
655 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
656 tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
657 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
658 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
659 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
660 NUM_BANKS(ADDR_SURF_16_BANK) |
661 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
662 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
663 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
664 tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
665 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
666 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
667 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
668 NUM_BANKS(ADDR_SURF_16_BANK) |
669 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
670 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
671 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
672 tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
673 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
674 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
675 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
676 NUM_BANKS(ADDR_SURF_16_BANK) |
677 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
678 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
679 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
680 tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
681 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
682 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
683 TILE_SPLIT(split_equal_to_row_size) |
684 NUM_BANKS(ADDR_SURF_16_BANK) |
685 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
686 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
687 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
688 tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
689 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
690 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
691 TILE_SPLIT(split_equal_to_row_size) |
692 NUM_BANKS(ADDR_SURF_16_BANK) |
693 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
694 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
695 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
696 tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
697 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
698 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
699 TILE_SPLIT(split_equal_to_row_size) |
700 NUM_BANKS(ADDR_SURF_16_BANK) |
701 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
702 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
703 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
704 tilemode[8] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
705 ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
706 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
707 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
708 NUM_BANKS(ADDR_SURF_16_BANK) |
709 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
710 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
711 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
712 tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
713 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
714 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
715 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
716 NUM_BANKS(ADDR_SURF_16_BANK) |
717 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
718 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
719 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
720 tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
721 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
722 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
723 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
724 NUM_BANKS(ADDR_SURF_16_BANK) |
725 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
726 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
727 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
728 tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
729 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
730 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
731 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
732 NUM_BANKS(ADDR_SURF_16_BANK) |
733 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
734 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
735 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
736 tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
737 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
738 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
739 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
740 NUM_BANKS(ADDR_SURF_16_BANK) |
741 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
742 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
743 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
744 tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
745 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
746 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
747 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
748 NUM_BANKS(ADDR_SURF_16_BANK) |
749 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
750 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
751 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
752 tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
753 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
754 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
755 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
756 NUM_BANKS(ADDR_SURF_16_BANK) |
757 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
758 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
759 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
760 tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
761 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
762 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
763 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
764 NUM_BANKS(ADDR_SURF_16_BANK) |
765 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
766 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
767 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
768 tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
769 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
770 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
771 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
772 NUM_BANKS(ADDR_SURF_16_BANK) |
773 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
774 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
775 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
776 tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
777 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
778 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
779 TILE_SPLIT(split_equal_to_row_size) |
780 NUM_BANKS(ADDR_SURF_16_BANK) |
781 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
782 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
783 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
784 tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
785 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
786 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
787 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
788 NUM_BANKS(ADDR_SURF_16_BANK) |
789 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
790 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
791 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
792 tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
793 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
794 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
795 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
796 NUM_BANKS(ADDR_SURF_16_BANK) |
797 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
798 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
799 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
800 tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
801 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
802 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
803 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
804 NUM_BANKS(ADDR_SURF_16_BANK) |
805 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
806 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
807 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
808 tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
809 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
810 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
811 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
812 NUM_BANKS(ADDR_SURF_16_BANK) |
813 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
814 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
815 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
816 tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
817 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
818 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
819 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
820 NUM_BANKS(ADDR_SURF_8_BANK) |
821 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
822 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
823 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1);
824 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
825 WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
826 } else if (adev->asic_type == CHIP_HAINAN) {
640 tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 827 tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
641 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 828 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
642 PIPE_CONFIG(ADDR_SURF_P2) | 829 PIPE_CONFIG(ADDR_SURF_P2) |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index e4a8c2e52cb2..660b3fbade41 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -892,6 +892,8 @@ static int kfd_ioctl_get_tile_config(struct file *filep,
892 int err = 0; 892 int err = 0;
893 893
894 dev = kfd_device_by_id(args->gpu_id); 894 dev = kfd_device_by_id(args->gpu_id);
895 if (!dev)
896 return -EINVAL;
895 897
896 dev->kfd2kgd->get_tile_config(dev->kgd, &config); 898 dev->kfd2kgd->get_tile_config(dev->kgd, &config);
897 899
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 5979158c3f7b..944abfad39c1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -292,7 +292,10 @@ static int create_signal_event(struct file *devkfd,
292 struct kfd_event *ev) 292 struct kfd_event *ev)
293{ 293{
294 if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) { 294 if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) {
295 pr_warn("Signal event wasn't created because limit was reached\n"); 295 if (!p->signal_event_limit_reached) {
296 pr_warn("Signal event wasn't created because limit was reached\n");
297 p->signal_event_limit_reached = true;
298 }
296 return -ENOMEM; 299 return -ENOMEM;
297 } 300 }
298 301
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 0649dd43e780..ed71ad40e8f7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -184,7 +184,7 @@ static void uninitialize(struct kernel_queue *kq)
184 if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ) 184 if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ)
185 kq->mqd->destroy_mqd(kq->mqd, 185 kq->mqd->destroy_mqd(kq->mqd,
186 kq->queue->mqd, 186 kq->queue->mqd,
187 false, 187 KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
188 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, 188 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
189 kq->queue->pipe, 189 kq->queue->pipe,
190 kq->queue->queue); 190 kq->queue->queue);
@@ -210,6 +210,11 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
210 uint32_t wptr, rptr; 210 uint32_t wptr, rptr;
211 unsigned int *queue_address; 211 unsigned int *queue_address;
212 212
213 /* When rptr == wptr, the buffer is empty.
214 * When rptr == wptr + 1, the buffer is full.
215 * It is always rptr that advances to the position of wptr, rather than
216 * the opposite. So we can only use up to queue_size_dwords - 1 dwords.
217 */
213 rptr = *kq->rptr_kernel; 218 rptr = *kq->rptr_kernel;
214 wptr = *kq->wptr_kernel; 219 wptr = *kq->wptr_kernel;
215 queue_address = (unsigned int *)kq->pq_kernel_addr; 220 queue_address = (unsigned int *)kq->pq_kernel_addr;
@@ -219,11 +224,10 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
219 pr_debug("wptr: %d\n", wptr); 224 pr_debug("wptr: %d\n", wptr);
220 pr_debug("queue_address 0x%p\n", queue_address); 225 pr_debug("queue_address 0x%p\n", queue_address);
221 226
222 available_size = (rptr - 1 - wptr + queue_size_dwords) % 227 available_size = (rptr + queue_size_dwords - 1 - wptr) %
223 queue_size_dwords; 228 queue_size_dwords;
224 229
225 if (packet_size_in_dwords >= queue_size_dwords || 230 if (packet_size_in_dwords > available_size) {
226 packet_size_in_dwords >= available_size) {
227 /* 231 /*
228 * make sure calling functions know 232 * make sure calling functions know
229 * acquire_packet_buffer() failed 233 * acquire_packet_buffer() failed
@@ -233,6 +237,14 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
233 } 237 }
234 238
235 if (wptr + packet_size_in_dwords >= queue_size_dwords) { 239 if (wptr + packet_size_in_dwords >= queue_size_dwords) {
240 /* make sure after rolling back to position 0, there is
241 * still enough space.
242 */
243 if (packet_size_in_dwords >= rptr) {
244 *buffer_ptr = NULL;
245 return -ENOMEM;
246 }
247 /* fill nops, roll back and start at position 0 */
236 while (wptr > 0) { 248 while (wptr > 0) {
237 queue_address[wptr] = kq->nop_packet; 249 queue_address[wptr] = kq->nop_packet;
238 wptr = (wptr + 1) % queue_size_dwords; 250 wptr = (wptr + 1) % queue_size_dwords;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 4cb90f517906..634083e340d1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -519,6 +519,7 @@ struct kfd_process {
519 struct list_head signal_event_pages; 519 struct list_head signal_event_pages;
520 u32 next_nonsignal_event_id; 520 u32 next_nonsignal_event_id;
521 size_t signal_event_count; 521 size_t signal_event_count;
522 bool signal_event_limit_reached;
522}; 523};
523 524
524/** 525/**
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index 5a634594a6ce..57881167ccd2 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -551,12 +551,15 @@ static const struct etnaviv_gem_ops etnaviv_gem_shmem_ops = {
551void etnaviv_gem_free_object(struct drm_gem_object *obj) 551void etnaviv_gem_free_object(struct drm_gem_object *obj)
552{ 552{
553 struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj); 553 struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
554 struct etnaviv_drm_private *priv = obj->dev->dev_private;
554 struct etnaviv_vram_mapping *mapping, *tmp; 555 struct etnaviv_vram_mapping *mapping, *tmp;
555 556
556 /* object should not be active */ 557 /* object should not be active */
557 WARN_ON(is_active(etnaviv_obj)); 558 WARN_ON(is_active(etnaviv_obj));
558 559
560 mutex_lock(&priv->gem_lock);
559 list_del(&etnaviv_obj->gem_node); 561 list_del(&etnaviv_obj->gem_node);
562 mutex_unlock(&priv->gem_lock);
560 563
561 list_for_each_entry_safe(mapping, tmp, &etnaviv_obj->vram_list, 564 list_for_each_entry_safe(mapping, tmp, &etnaviv_obj->vram_list,
562 obj_node) { 565 obj_node) {
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index 026ef4e02f85..46dfe0737f43 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -445,8 +445,10 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
445 cmdbuf->user_size = ALIGN(args->stream_size, 8); 445 cmdbuf->user_size = ALIGN(args->stream_size, 8);
446 446
447 ret = etnaviv_gpu_submit(gpu, submit, cmdbuf); 447 ret = etnaviv_gpu_submit(gpu, submit, cmdbuf);
448 if (ret == 0) 448 if (ret)
449 cmdbuf = NULL; 449 goto out;
450
451 cmdbuf = NULL;
450 452
451 if (args->flags & ETNA_SUBMIT_FENCE_FD_OUT) { 453 if (args->flags & ETNA_SUBMIT_FENCE_FD_OUT) {
452 /* 454 /*
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 14c5613b4388..afbf50d0c08f 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -509,23 +509,25 @@ static void qxl_primary_atomic_update(struct drm_plane *plane,
509 .y2 = qfb->base.height 509 .y2 = qfb->base.height
510 }; 510 };
511 511
512 if (!old_state->fb) { 512 if (old_state->fb) {
513 qxl_io_log(qdev, 513 qfb_old = to_qxl_framebuffer(old_state->fb);
514 "create primary fb: %dx%d,%d,%d\n", 514 bo_old = gem_to_qxl_bo(qfb_old->obj);
515 bo->surf.width, bo->surf.height, 515 } else {
516 bo->surf.stride, bo->surf.format); 516 bo_old = NULL;
517 }
517 518
518 qxl_io_create_primary(qdev, 0, bo); 519 if (bo == bo_old)
519 bo->is_primary = true;
520 return; 520 return;
521 521
522 } else { 522 if (bo_old && bo_old->is_primary) {
523 qfb_old = to_qxl_framebuffer(old_state->fb); 523 qxl_io_destroy_primary(qdev);
524 bo_old = gem_to_qxl_bo(qfb_old->obj);
525 bo_old->is_primary = false; 524 bo_old->is_primary = false;
526 } 525 }
527 526
528 bo->is_primary = true; 527 if (!bo->is_primary) {
528 qxl_io_create_primary(qdev, 0, bo);
529 bo->is_primary = true;
530 }
529 qxl_draw_dirty_fb(qdev, qfb, bo, 0, 0, &norect, 1, 1); 531 qxl_draw_dirty_fb(qdev, qfb, bo, 0, 0, &norect, 1, 1);
530} 532}
531 533
@@ -534,13 +536,15 @@ static void qxl_primary_atomic_disable(struct drm_plane *plane,
534{ 536{
535 struct qxl_device *qdev = plane->dev->dev_private; 537 struct qxl_device *qdev = plane->dev->dev_private;
536 538
537 if (old_state->fb) 539 if (old_state->fb) {
538 { struct qxl_framebuffer *qfb = 540 struct qxl_framebuffer *qfb =
539 to_qxl_framebuffer(old_state->fb); 541 to_qxl_framebuffer(old_state->fb);
540 struct qxl_bo *bo = gem_to_qxl_bo(qfb->obj); 542 struct qxl_bo *bo = gem_to_qxl_bo(qfb->obj);
541 543
542 qxl_io_destroy_primary(qdev); 544 if (bo->is_primary) {
543 bo->is_primary = false; 545 qxl_io_destroy_primary(qdev);
546 bo->is_primary = false;
547 }
544 } 548 }
545} 549}
546 550
@@ -698,14 +702,15 @@ static void qxl_plane_cleanup_fb(struct drm_plane *plane,
698 struct drm_gem_object *obj; 702 struct drm_gem_object *obj;
699 struct qxl_bo *user_bo; 703 struct qxl_bo *user_bo;
700 704
701 if (!plane->state->fb) { 705 if (!old_state->fb) {
702 /* we never executed prepare_fb, so there's nothing to 706 /*
707 * we never executed prepare_fb, so there's nothing to
703 * unpin. 708 * unpin.
704 */ 709 */
705 return; 710 return;
706 } 711 }
707 712
708 obj = to_qxl_framebuffer(plane->state->fb)->obj; 713 obj = to_qxl_framebuffer(old_state->fb)->obj;
709 user_bo = gem_to_qxl_bo(obj); 714 user_bo = gem_to_qxl_bo(obj);
710 qxl_bo_unpin(user_bo); 715 qxl_bo_unpin(user_bo);
711} 716}
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 997131d58c7f..ffc10cadcf34 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1663,7 +1663,7 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend,
1663 radeon_agp_suspend(rdev); 1663 radeon_agp_suspend(rdev);
1664 1664
1665 pci_save_state(dev->pdev); 1665 pci_save_state(dev->pdev);
1666 if (freeze && rdev->family >= CHIP_CEDAR) { 1666 if (freeze && rdev->family >= CHIP_CEDAR && !(rdev->flags & RADEON_IS_IGP)) {
1667 rdev->asic->asic_reset(rdev, true); 1667 rdev->asic->asic_reset(rdev, true);
1668 pci_restore_state(dev->pdev); 1668 pci_restore_state(dev->pdev);
1669 } else if (suspend) { 1669 } else if (suspend) {
diff --git a/drivers/gpu/drm/sun4i/Kconfig b/drivers/gpu/drm/sun4i/Kconfig
index 06f05302ee75..882d85db9053 100644
--- a/drivers/gpu/drm/sun4i/Kconfig
+++ b/drivers/gpu/drm/sun4i/Kconfig
@@ -26,7 +26,7 @@ config DRM_SUN4I_HDMI_CEC
26 bool "Allwinner A10 HDMI CEC Support" 26 bool "Allwinner A10 HDMI CEC Support"
27 depends on DRM_SUN4I_HDMI 27 depends on DRM_SUN4I_HDMI
28 select CEC_CORE 28 select CEC_CORE
29 depends on CEC_PIN 29 select CEC_PIN
30 help 30 help
31 Choose this option if you have an Allwinner SoC with an HDMI 31 Choose this option if you have an Allwinner SoC with an HDMI
32 controller and want to use CEC. 32 controller and want to use CEC.
diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi.h b/drivers/gpu/drm/sun4i/sun4i_hdmi.h
index 1457750988da..a1f8cba251a2 100644
--- a/drivers/gpu/drm/sun4i/sun4i_hdmi.h
+++ b/drivers/gpu/drm/sun4i/sun4i_hdmi.h
@@ -15,7 +15,7 @@
15#include <drm/drm_connector.h> 15#include <drm/drm_connector.h>
16#include <drm/drm_encoder.h> 16#include <drm/drm_encoder.h>
17 17
18#include <media/cec.h> 18#include <media/cec-pin.h>
19 19
20#define SUN4I_HDMI_CTRL_REG 0x004 20#define SUN4I_HDMI_CTRL_REG 0x004
21#define SUN4I_HDMI_CTRL_ENABLE BIT(31) 21#define SUN4I_HDMI_CTRL_ENABLE BIT(31)
diff --git a/drivers/gpu/drm/tegra/trace.h b/drivers/gpu/drm/tegra/trace.h
index e9b7cdad5c4c..5a1ab4046e92 100644
--- a/drivers/gpu/drm/tegra/trace.h
+++ b/drivers/gpu/drm/tegra/trace.h
@@ -63,6 +63,6 @@ DEFINE_EVENT(register_access, sor_readl,
63 63
64/* This part must be outside protection */ 64/* This part must be outside protection */
65#undef TRACE_INCLUDE_PATH 65#undef TRACE_INCLUDE_PATH
66#define TRACE_INCLUDE_PATH . 66#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/tegra
67#define TRACE_INCLUDE_FILE trace 67#define TRACE_INCLUDE_FILE trace
68#include <trace/define_trace.h> 68#include <trace/define_trace.h>
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 70ad19c4c73e..88bdafb297f5 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -432,8 +432,10 @@ int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev)
432 atomic_set(&qp->qp_sec->error_list_count, 0); 432 atomic_set(&qp->qp_sec->error_list_count, 0);
433 init_completion(&qp->qp_sec->error_complete); 433 init_completion(&qp->qp_sec->error_complete);
434 ret = security_ib_alloc_security(&qp->qp_sec->security); 434 ret = security_ib_alloc_security(&qp->qp_sec->security);
435 if (ret) 435 if (ret) {
436 kfree(qp->qp_sec); 436 kfree(qp->qp_sec);
437 qp->qp_sec = NULL;
438 }
437 439
438 return ret; 440 return ret;
439} 441}
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 4ab30d832ac5..52a2cf2d83aa 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -3869,15 +3869,15 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
3869 resp.raw_packet_caps = attr.raw_packet_caps; 3869 resp.raw_packet_caps = attr.raw_packet_caps;
3870 resp.response_length += sizeof(resp.raw_packet_caps); 3870 resp.response_length += sizeof(resp.raw_packet_caps);
3871 3871
3872 if (ucore->outlen < resp.response_length + sizeof(resp.xrq_caps)) 3872 if (ucore->outlen < resp.response_length + sizeof(resp.tm_caps))
3873 goto end; 3873 goto end;
3874 3874
3875 resp.xrq_caps.max_rndv_hdr_size = attr.xrq_caps.max_rndv_hdr_size; 3875 resp.tm_caps.max_rndv_hdr_size = attr.tm_caps.max_rndv_hdr_size;
3876 resp.xrq_caps.max_num_tags = attr.xrq_caps.max_num_tags; 3876 resp.tm_caps.max_num_tags = attr.tm_caps.max_num_tags;
3877 resp.xrq_caps.max_ops = attr.xrq_caps.max_ops; 3877 resp.tm_caps.max_ops = attr.tm_caps.max_ops;
3878 resp.xrq_caps.max_sge = attr.xrq_caps.max_sge; 3878 resp.tm_caps.max_sge = attr.tm_caps.max_sge;
3879 resp.xrq_caps.flags = attr.xrq_caps.flags; 3879 resp.tm_caps.flags = attr.tm_caps.flags;
3880 resp.response_length += sizeof(resp.xrq_caps); 3880 resp.response_length += sizeof(resp.tm_caps);
3881end: 3881end:
3882 err = ib_copy_to_udata(ucore, &resp, resp.response_length); 3882 err = ib_copy_to_udata(ucore, &resp, resp.response_length);
3883 return err; 3883 return err;
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index b2ed4b9cda6e..0be42787759f 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1066,6 +1066,8 @@ static int read_idle_sma(struct hfi1_devdata *dd, u64 *data);
1066static int thermal_init(struct hfi1_devdata *dd); 1066static int thermal_init(struct hfi1_devdata *dd);
1067 1067
1068static void update_statusp(struct hfi1_pportdata *ppd, u32 state); 1068static void update_statusp(struct hfi1_pportdata *ppd, u32 state);
1069static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd,
1070 int msecs);
1069static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, 1071static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
1070 int msecs); 1072 int msecs);
1071static void log_state_transition(struct hfi1_pportdata *ppd, u32 state); 1073static void log_state_transition(struct hfi1_pportdata *ppd, u32 state);
@@ -8238,6 +8240,7 @@ static irqreturn_t general_interrupt(int irq, void *data)
8238 u64 regs[CCE_NUM_INT_CSRS]; 8240 u64 regs[CCE_NUM_INT_CSRS];
8239 u32 bit; 8241 u32 bit;
8240 int i; 8242 int i;
8243 irqreturn_t handled = IRQ_NONE;
8241 8244
8242 this_cpu_inc(*dd->int_counter); 8245 this_cpu_inc(*dd->int_counter);
8243 8246
@@ -8258,9 +8261,10 @@ static irqreturn_t general_interrupt(int irq, void *data)
8258 for_each_set_bit(bit, (unsigned long *)&regs[0], 8261 for_each_set_bit(bit, (unsigned long *)&regs[0],
8259 CCE_NUM_INT_CSRS * 64) { 8262 CCE_NUM_INT_CSRS * 64) {
8260 is_interrupt(dd, bit); 8263 is_interrupt(dd, bit);
8264 handled = IRQ_HANDLED;
8261 } 8265 }
8262 8266
8263 return IRQ_HANDLED; 8267 return handled;
8264} 8268}
8265 8269
8266static irqreturn_t sdma_interrupt(int irq, void *data) 8270static irqreturn_t sdma_interrupt(int irq, void *data)
@@ -9413,7 +9417,7 @@ static void set_qsfp_int_n(struct hfi1_pportdata *ppd, u8 enable)
9413 write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask); 9417 write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask);
9414} 9418}
9415 9419
9416void reset_qsfp(struct hfi1_pportdata *ppd) 9420int reset_qsfp(struct hfi1_pportdata *ppd)
9417{ 9421{
9418 struct hfi1_devdata *dd = ppd->dd; 9422 struct hfi1_devdata *dd = ppd->dd;
9419 u64 mask, qsfp_mask; 9423 u64 mask, qsfp_mask;
@@ -9443,6 +9447,13 @@ void reset_qsfp(struct hfi1_pportdata *ppd)
9443 * for alarms and warnings 9447 * for alarms and warnings
9444 */ 9448 */
9445 set_qsfp_int_n(ppd, 1); 9449 set_qsfp_int_n(ppd, 1);
9450
9451 /*
9452 * After the reset, AOC transmitters are enabled by default. They need
9453 * to be turned off to complete the QSFP setup before they can be
9454 * enabled again.
9455 */
9456 return set_qsfp_tx(ppd, 0);
9446} 9457}
9447 9458
9448static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, 9459static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
@@ -10305,6 +10316,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
10305{ 10316{
10306 struct hfi1_devdata *dd = ppd->dd; 10317 struct hfi1_devdata *dd = ppd->dd;
10307 u32 previous_state; 10318 u32 previous_state;
10319 int offline_state_ret;
10308 int ret; 10320 int ret;
10309 10321
10310 update_lcb_cache(dd); 10322 update_lcb_cache(dd);
@@ -10326,28 +10338,11 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
10326 ppd->offline_disabled_reason = 10338 ppd->offline_disabled_reason =
10327 HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT); 10339 HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
10328 10340
10329 /* 10341 offline_state_ret = wait_phys_link_offline_substates(ppd, 10000);
10330 * Wait for offline transition. It can take a while for 10342 if (offline_state_ret < 0)
10331 * the link to go down. 10343 return offline_state_ret;
10332 */
10333 ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000);
10334 if (ret < 0)
10335 return ret;
10336
10337 /*
10338 * Now in charge of LCB - must be after the physical state is
10339 * offline.quiet and before host_link_state is changed.
10340 */
10341 set_host_lcb_access(dd);
10342 write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
10343
10344 /* make sure the logical state is also down */
10345 ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
10346 if (ret)
10347 force_logical_link_state_down(ppd);
10348
10349 ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
10350 10344
10345 /* Disabling AOC transmitters */
10351 if (ppd->port_type == PORT_TYPE_QSFP && 10346 if (ppd->port_type == PORT_TYPE_QSFP &&
10352 ppd->qsfp_info.limiting_active && 10347 ppd->qsfp_info.limiting_active &&
10353 qsfp_mod_present(ppd)) { 10348 qsfp_mod_present(ppd)) {
@@ -10365,6 +10360,30 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
10365 } 10360 }
10366 10361
10367 /* 10362 /*
10363 * Wait for the offline.Quiet transition if it hasn't happened yet. It
10364 * can take a while for the link to go down.
10365 */
10366 if (offline_state_ret != PLS_OFFLINE_QUIET) {
10367 ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 30000);
10368 if (ret < 0)
10369 return ret;
10370 }
10371
10372 /*
10373 * Now in charge of LCB - must be after the physical state is
10374 * offline.quiet and before host_link_state is changed.
10375 */
10376 set_host_lcb_access(dd);
10377 write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
10378
10379 /* make sure the logical state is also down */
10380 ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
10381 if (ret)
10382 force_logical_link_state_down(ppd);
10383
10384 ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
10385
10386 /*
10368 * The LNI has a mandatory wait time after the physical state 10387 * The LNI has a mandatory wait time after the physical state
10369 * moves to Offline.Quiet. The wait time may be different 10388 * moves to Offline.Quiet. The wait time may be different
10370 * depending on how the link went down. The 8051 firmware 10389 * depending on how the link went down. The 8051 firmware
@@ -10396,6 +10415,9 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
10396 & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) { 10415 & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
10397 /* went down while attempting link up */ 10416 /* went down while attempting link up */
10398 check_lni_states(ppd); 10417 check_lni_states(ppd);
10418
10419 /* The QSFP doesn't need to be reset on LNI failure */
10420 ppd->qsfp_info.reset_needed = 0;
10399 } 10421 }
10400 10422
10401 /* the active link width (downgrade) is 0 on link down */ 10423 /* the active link width (downgrade) is 0 on link down */
@@ -12804,6 +12826,39 @@ static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
12804 return 0; 12826 return 0;
12805} 12827}
12806 12828
12829/*
12830 * wait_phys_link_offline_quiet_substates - wait for any offline substate
12831 * @ppd: port device
12832 * @msecs: the number of milliseconds to wait
12833 *
12834 * Wait up to msecs milliseconds for any offline physical link
12835 * state change to occur.
12836 * Returns 0 if at least one state is reached, otherwise -ETIMEDOUT.
12837 */
12838static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd,
12839 int msecs)
12840{
12841 u32 read_state;
12842 unsigned long timeout;
12843
12844 timeout = jiffies + msecs_to_jiffies(msecs);
12845 while (1) {
12846 read_state = read_physical_state(ppd->dd);
12847 if ((read_state & 0xF0) == PLS_OFFLINE)
12848 break;
12849 if (time_after(jiffies, timeout)) {
12850 dd_dev_err(ppd->dd,
12851 "timeout waiting for phy link offline.quiet substates. Read state 0x%x, %dms\n",
12852 read_state, msecs);
12853 return -ETIMEDOUT;
12854 }
12855 usleep_range(1950, 2050); /* sleep 2ms-ish */
12856 }
12857
12858 log_state_transition(ppd, read_state);
12859 return read_state;
12860}
12861
12807#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \ 12862#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
12808(r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) 12863(r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
12809 12864
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index b8345a60a0fb..50b8645d0b87 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -204,6 +204,7 @@
204#define PLS_OFFLINE_READY_TO_QUIET_LT 0x92 204#define PLS_OFFLINE_READY_TO_QUIET_LT 0x92
205#define PLS_OFFLINE_REPORT_FAILURE 0x93 205#define PLS_OFFLINE_REPORT_FAILURE 0x93
206#define PLS_OFFLINE_READY_TO_QUIET_BCC 0x94 206#define PLS_OFFLINE_READY_TO_QUIET_BCC 0x94
207#define PLS_OFFLINE_QUIET_DURATION 0x95
207#define PLS_POLLING 0x20 208#define PLS_POLLING 0x20
208#define PLS_POLLING_QUIET 0x20 209#define PLS_POLLING_QUIET 0x20
209#define PLS_POLLING_ACTIVE 0x21 210#define PLS_POLLING_ACTIVE 0x21
@@ -722,7 +723,7 @@ void handle_link_downgrade(struct work_struct *work);
722void handle_link_bounce(struct work_struct *work); 723void handle_link_bounce(struct work_struct *work);
723void handle_start_link(struct work_struct *work); 724void handle_start_link(struct work_struct *work);
724void handle_sma_message(struct work_struct *work); 725void handle_sma_message(struct work_struct *work);
725void reset_qsfp(struct hfi1_pportdata *ppd); 726int reset_qsfp(struct hfi1_pportdata *ppd);
726void qsfp_event(struct work_struct *work); 727void qsfp_event(struct work_struct *work);
727void start_freeze_handling(struct hfi1_pportdata *ppd, int flags); 728void start_freeze_handling(struct hfi1_pportdata *ppd, int flags);
728int send_idle_sma(struct hfi1_devdata *dd, u64 message); 729int send_idle_sma(struct hfi1_devdata *dd, u64 message);
diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c
index d46b17107901..1613af1c58d9 100644
--- a/drivers/infiniband/hw/hfi1/eprom.c
+++ b/drivers/infiniband/hw/hfi1/eprom.c
@@ -204,7 +204,10 @@ done_asic:
204 return ret; 204 return ret;
205} 205}
206 206
207/* magic character sequence that trails an image */ 207/* magic character sequence that begins an image */
208#define IMAGE_START_MAGIC "APO="
209
210/* magic character sequence that might trail an image */
208#define IMAGE_TRAIL_MAGIC "egamiAPO" 211#define IMAGE_TRAIL_MAGIC "egamiAPO"
209 212
210/* EPROM file types */ 213/* EPROM file types */
@@ -250,6 +253,7 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
250{ 253{
251 void *buffer; 254 void *buffer;
252 void *p; 255 void *p;
256 u32 length;
253 int ret; 257 int ret;
254 258
255 buffer = kmalloc(P1_SIZE, GFP_KERNEL); 259 buffer = kmalloc(P1_SIZE, GFP_KERNEL);
@@ -262,15 +266,21 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
262 return ret; 266 return ret;
263 } 267 }
264 268
265 /* scan for image magic that may trail the actual data */ 269 /* config partition is valid only if it starts with IMAGE_START_MAGIC */
266 p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE); 270 if (memcmp(buffer, IMAGE_START_MAGIC, strlen(IMAGE_START_MAGIC))) {
267 if (!p) {
268 kfree(buffer); 271 kfree(buffer);
269 return -ENOENT; 272 return -ENOENT;
270 } 273 }
271 274
275 /* scan for image magic that may trail the actual data */
276 p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
277 if (p)
278 length = p - buffer;
279 else
280 length = P1_SIZE;
281
272 *data = buffer; 282 *data = buffer;
273 *size = p - buffer; 283 *size = length;
274 return 0; 284 return 0;
275} 285}
276 286
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 2bc89260235a..d9a1e9893136 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -930,15 +930,8 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
930 switch (ret) { 930 switch (ret) {
931 case 0: 931 case 0:
932 ret = setup_base_ctxt(fd, uctxt); 932 ret = setup_base_ctxt(fd, uctxt);
933 if (uctxt->subctxt_cnt) { 933 if (ret)
934 /* 934 deallocate_ctxt(uctxt);
935 * Base context is done (successfully or not), notify
936 * anybody using a sub-context that is waiting for
937 * this completion.
938 */
939 clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
940 wake_up(&uctxt->wait);
941 }
942 break; 935 break;
943 case 1: 936 case 1:
944 ret = complete_subctxt(fd); 937 ret = complete_subctxt(fd);
@@ -1305,25 +1298,25 @@ static int setup_base_ctxt(struct hfi1_filedata *fd,
1305 /* Now allocate the RcvHdr queue and eager buffers. */ 1298 /* Now allocate the RcvHdr queue and eager buffers. */
1306 ret = hfi1_create_rcvhdrq(dd, uctxt); 1299 ret = hfi1_create_rcvhdrq(dd, uctxt);
1307 if (ret) 1300 if (ret)
1308 return ret; 1301 goto done;
1309 1302
1310 ret = hfi1_setup_eagerbufs(uctxt); 1303 ret = hfi1_setup_eagerbufs(uctxt);
1311 if (ret) 1304 if (ret)
1312 goto setup_failed; 1305 goto done;
1313 1306
1314 /* If sub-contexts are enabled, do the appropriate setup */ 1307 /* If sub-contexts are enabled, do the appropriate setup */
1315 if (uctxt->subctxt_cnt) 1308 if (uctxt->subctxt_cnt)
1316 ret = setup_subctxt(uctxt); 1309 ret = setup_subctxt(uctxt);
1317 if (ret) 1310 if (ret)
1318 goto setup_failed; 1311 goto done;
1319 1312
1320 ret = hfi1_alloc_ctxt_rcv_groups(uctxt); 1313 ret = hfi1_alloc_ctxt_rcv_groups(uctxt);
1321 if (ret) 1314 if (ret)
1322 goto setup_failed; 1315 goto done;
1323 1316
1324 ret = init_user_ctxt(fd, uctxt); 1317 ret = init_user_ctxt(fd, uctxt);
1325 if (ret) 1318 if (ret)
1326 goto setup_failed; 1319 goto done;
1327 1320
1328 user_init(uctxt); 1321 user_init(uctxt);
1329 1322
@@ -1331,12 +1324,22 @@ static int setup_base_ctxt(struct hfi1_filedata *fd,
1331 fd->uctxt = uctxt; 1324 fd->uctxt = uctxt;
1332 hfi1_rcd_get(uctxt); 1325 hfi1_rcd_get(uctxt);
1333 1326
1334 return 0; 1327done:
1328 if (uctxt->subctxt_cnt) {
1329 /*
1330 * On error, set the failed bit so sub-contexts will clean up
1331 * correctly.
1332 */
1333 if (ret)
1334 set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags);
1335 1335
1336setup_failed: 1336 /*
1337 /* Set the failed bit so sub-context init can do the right thing */ 1337 * Base context is done (successfully or not), notify anybody
1338 set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags); 1338 * using a sub-context that is waiting for this completion.
1339 deallocate_ctxt(uctxt); 1339 */
1340 clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
1341 wake_up(&uctxt->wait);
1342 }
1340 1343
1341 return ret; 1344 return ret;
1342} 1345}
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 82447b7cdda1..09e50fd2a08f 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -68,7 +68,7 @@
68/* 68/*
69 * Code to adjust PCIe capabilities. 69 * Code to adjust PCIe capabilities.
70 */ 70 */
71static int tune_pcie_caps(struct hfi1_devdata *); 71static void tune_pcie_caps(struct hfi1_devdata *);
72 72
73/* 73/*
74 * Do all the common PCIe setup and initialization. 74 * Do all the common PCIe setup and initialization.
@@ -351,7 +351,7 @@ int pcie_speeds(struct hfi1_devdata *dd)
351 */ 351 */
352int request_msix(struct hfi1_devdata *dd, u32 msireq) 352int request_msix(struct hfi1_devdata *dd, u32 msireq)
353{ 353{
354 int nvec, ret; 354 int nvec;
355 355
356 nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq, 356 nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq,
357 PCI_IRQ_MSIX | PCI_IRQ_LEGACY); 357 PCI_IRQ_MSIX | PCI_IRQ_LEGACY);
@@ -360,12 +360,7 @@ int request_msix(struct hfi1_devdata *dd, u32 msireq)
360 return nvec; 360 return nvec;
361 } 361 }
362 362
363 ret = tune_pcie_caps(dd); 363 tune_pcie_caps(dd);
364 if (ret) {
365 dd_dev_err(dd, "tune_pcie_caps() failed: %d\n", ret);
366 pci_free_irq_vectors(dd->pcidev);
367 return ret;
368 }
369 364
370 /* check for legacy IRQ */ 365 /* check for legacy IRQ */
371 if (nvec == 1 && !dd->pcidev->msix_enabled) 366 if (nvec == 1 && !dd->pcidev->msix_enabled)
@@ -502,7 +497,7 @@ uint aspm_mode = ASPM_MODE_DISABLED;
502module_param_named(aspm, aspm_mode, uint, S_IRUGO); 497module_param_named(aspm, aspm_mode, uint, S_IRUGO);
503MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic"); 498MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
504 499
505static int tune_pcie_caps(struct hfi1_devdata *dd) 500static void tune_pcie_caps(struct hfi1_devdata *dd)
506{ 501{
507 struct pci_dev *parent; 502 struct pci_dev *parent;
508 u16 rc_mpss, rc_mps, ep_mpss, ep_mps; 503 u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
@@ -513,22 +508,14 @@ static int tune_pcie_caps(struct hfi1_devdata *dd)
513 * Turn on extended tags in DevCtl in case the BIOS has turned it off 508 * Turn on extended tags in DevCtl in case the BIOS has turned it off
514 * to improve WFR SDMA bandwidth 509 * to improve WFR SDMA bandwidth
515 */ 510 */
516 ret = pcie_capability_read_word(dd->pcidev, 511 ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &ectl);
517 PCI_EXP_DEVCTL, &ectl); 512 if ((!ret) && !(ectl & PCI_EXP_DEVCTL_EXT_TAG)) {
518 if (ret) {
519 dd_dev_err(dd, "Unable to read from PCI config\n");
520 return ret;
521 }
522
523 if (!(ectl & PCI_EXP_DEVCTL_EXT_TAG)) {
524 dd_dev_info(dd, "Enabling PCIe extended tags\n"); 513 dd_dev_info(dd, "Enabling PCIe extended tags\n");
525 ectl |= PCI_EXP_DEVCTL_EXT_TAG; 514 ectl |= PCI_EXP_DEVCTL_EXT_TAG;
526 ret = pcie_capability_write_word(dd->pcidev, 515 ret = pcie_capability_write_word(dd->pcidev,
527 PCI_EXP_DEVCTL, ectl); 516 PCI_EXP_DEVCTL, ectl);
528 if (ret) { 517 if (ret)
529 dd_dev_err(dd, "Unable to write to PCI config\n"); 518 dd_dev_info(dd, "Unable to write to PCI config\n");
530 return ret;
531 }
532 } 519 }
533 /* Find out supported and configured values for parent (root) */ 520 /* Find out supported and configured values for parent (root) */
534 parent = dd->pcidev->bus->self; 521 parent = dd->pcidev->bus->self;
@@ -536,15 +523,22 @@ static int tune_pcie_caps(struct hfi1_devdata *dd)
536 * The driver cannot perform the tuning if it does not have 523 * The driver cannot perform the tuning if it does not have
537 * access to the upstream component. 524 * access to the upstream component.
538 */ 525 */
539 if (!parent) 526 if (!parent) {
540 return -EINVAL; 527 dd_dev_info(dd, "Parent not found\n");
528 return;
529 }
541 if (!pci_is_root_bus(parent->bus)) { 530 if (!pci_is_root_bus(parent->bus)) {
542 dd_dev_info(dd, "Parent not root\n"); 531 dd_dev_info(dd, "Parent not root\n");
543 return -EINVAL; 532 return;
533 }
534 if (!pci_is_pcie(parent)) {
535 dd_dev_info(dd, "Parent is not PCI Express capable\n");
536 return;
537 }
538 if (!pci_is_pcie(dd->pcidev)) {
539 dd_dev_info(dd, "PCI device is not PCI Express capable\n");
540 return;
544 } 541 }
545
546 if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev))
547 return -EINVAL;
548 rc_mpss = parent->pcie_mpss; 542 rc_mpss = parent->pcie_mpss;
549 rc_mps = ffs(pcie_get_mps(parent)) - 8; 543 rc_mps = ffs(pcie_get_mps(parent)) - 8;
550 /* Find out supported and configured values for endpoint (us) */ 544 /* Find out supported and configured values for endpoint (us) */
@@ -590,8 +584,6 @@ static int tune_pcie_caps(struct hfi1_devdata *dd)
590 ep_mrrs = max_mrrs; 584 ep_mrrs = max_mrrs;
591 pcie_set_readrq(dd->pcidev, ep_mrrs); 585 pcie_set_readrq(dd->pcidev, ep_mrrs);
592 } 586 }
593
594 return 0;
595} 587}
596 588
597/* End of PCIe capability tuning */ 589/* End of PCIe capability tuning */
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index a8af96d2b1b0..d486355880cb 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -790,7 +790,9 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
790 * reuse of stale settings established in our previous pass through. 790 * reuse of stale settings established in our previous pass through.
791 */ 791 */
792 if (ppd->qsfp_info.reset_needed) { 792 if (ppd->qsfp_info.reset_needed) {
793 reset_qsfp(ppd); 793 ret = reset_qsfp(ppd);
794 if (ret)
795 return ret;
794 refresh_qsfp_cache(ppd, &ppd->qsfp_info); 796 refresh_qsfp_cache(ppd, &ppd->qsfp_info);
795 } else { 797 } else {
796 ppd->qsfp_info.reset_needed = 1; 798 ppd->qsfp_info.reset_needed = 1;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 05fb4bdff6a0..d6fbad8f34aa 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -778,13 +778,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
778 } 778 }
779 779
780 if (MLX5_CAP_GEN(mdev, tag_matching)) { 780 if (MLX5_CAP_GEN(mdev, tag_matching)) {
781 props->xrq_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE; 781 props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
782 props->xrq_caps.max_num_tags = 782 props->tm_caps.max_num_tags =
783 (1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1; 783 (1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1;
784 props->xrq_caps.flags = IB_TM_CAP_RC; 784 props->tm_caps.flags = IB_TM_CAP_RC;
785 props->xrq_caps.max_ops = 785 props->tm_caps.max_ops =
786 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); 786 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
787 props->xrq_caps.max_sge = MLX5_TM_MAX_SGE; 787 props->tm_caps.max_sge = MLX5_TM_MAX_SGE;
788 } 788 }
789 789
790 if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) { 790 if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 914f212e7ef6..f3dbd75a0a96 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -50,13 +50,9 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
50{ 50{
51 unsigned long tmp; 51 unsigned long tmp;
52 unsigned long m; 52 unsigned long m;
53 int i, k; 53 u64 base = ~0, p = 0;
54 u64 base = 0; 54 u64 len, pfn;
55 int p = 0; 55 int i = 0;
56 int skip;
57 int mask;
58 u64 len;
59 u64 pfn;
60 struct scatterlist *sg; 56 struct scatterlist *sg;
61 int entry; 57 int entry;
62 unsigned long page_shift = umem->page_shift; 58 unsigned long page_shift = umem->page_shift;
@@ -76,33 +72,24 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
76 m = find_first_bit(&tmp, BITS_PER_LONG); 72 m = find_first_bit(&tmp, BITS_PER_LONG);
77 if (max_page_shift) 73 if (max_page_shift)
78 m = min_t(unsigned long, max_page_shift - page_shift, m); 74 m = min_t(unsigned long, max_page_shift - page_shift, m);
79 skip = 1 << m; 75
80 mask = skip - 1;
81 i = 0;
82 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 76 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
83 len = sg_dma_len(sg) >> page_shift; 77 len = sg_dma_len(sg) >> page_shift;
84 pfn = sg_dma_address(sg) >> page_shift; 78 pfn = sg_dma_address(sg) >> page_shift;
85 for (k = 0; k < len; k++) { 79 if (base + p != pfn) {
86 if (!(i & mask)) { 80 /* If either the offset or the new
87 tmp = (unsigned long)pfn; 81 * base are unaligned update m
88 m = min_t(unsigned long, m, find_first_bit(&tmp, BITS_PER_LONG)); 82 */
89 skip = 1 << m; 83 tmp = (unsigned long)(pfn | p);
90 mask = skip - 1; 84 if (!IS_ALIGNED(tmp, 1 << m))
91 base = pfn; 85 m = find_first_bit(&tmp, BITS_PER_LONG);
92 p = 0; 86
93 } else { 87 base = pfn;
94 if (base + p != pfn) { 88 p = 0;
95 tmp = (unsigned long)p;
96 m = find_first_bit(&tmp, BITS_PER_LONG);
97 skip = 1 << m;
98 mask = skip - 1;
99 base = pfn;
100 p = 0;
101 }
102 }
103 p++;
104 i++;
105 } 89 }
90
91 p += len;
92 i += len;
106 } 93 }
107 94
108 if (i) { 95 if (i) {
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 0e2789d9bb4d..37bbc543847a 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -47,7 +47,8 @@ enum {
47 47
48#define MLX5_UMR_ALIGN 2048 48#define MLX5_UMR_ALIGN 2048
49 49
50static int clean_mr(struct mlx5_ib_mr *mr); 50static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
51static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
51static int mr_cache_max_order(struct mlx5_ib_dev *dev); 52static int mr_cache_max_order(struct mlx5_ib_dev *dev);
52static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); 53static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
53 54
@@ -1270,8 +1271,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1270 1271
1271 err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift, 1272 err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
1272 update_xlt_flags); 1273 update_xlt_flags);
1274
1273 if (err) { 1275 if (err) {
1274 mlx5_ib_dereg_mr(&mr->ibmr); 1276 dereg_mr(dev, mr);
1275 return ERR_PTR(err); 1277 return ERR_PTR(err);
1276 } 1278 }
1277 } 1279 }
@@ -1356,7 +1358,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
1356 err = mr_umem_get(pd, addr, len, access_flags, &mr->umem, 1358 err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
1357 &npages, &page_shift, &ncont, &order); 1359 &npages, &page_shift, &ncont, &order);
1358 if (err < 0) { 1360 if (err < 0) {
1359 clean_mr(mr); 1361 clean_mr(dev, mr);
1360 return err; 1362 return err;
1361 } 1363 }
1362 } 1364 }
@@ -1410,7 +1412,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
1410 if (err) { 1412 if (err) {
1411 mlx5_ib_warn(dev, "Failed to rereg UMR\n"); 1413 mlx5_ib_warn(dev, "Failed to rereg UMR\n");
1412 ib_umem_release(mr->umem); 1414 ib_umem_release(mr->umem);
1413 clean_mr(mr); 1415 clean_mr(dev, mr);
1414 return err; 1416 return err;
1415 } 1417 }
1416 } 1418 }
@@ -1469,9 +1471,8 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
1469 } 1471 }
1470} 1472}
1471 1473
1472static int clean_mr(struct mlx5_ib_mr *mr) 1474static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1473{ 1475{
1474 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
1475 int allocated_from_cache = mr->allocated_from_cache; 1476 int allocated_from_cache = mr->allocated_from_cache;
1476 int err; 1477 int err;
1477 1478
@@ -1507,10 +1508,8 @@ static int clean_mr(struct mlx5_ib_mr *mr)
1507 return 0; 1508 return 0;
1508} 1509}
1509 1510
1510int mlx5_ib_dereg_mr(struct ib_mr *ibmr) 1511static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1511{ 1512{
1512 struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
1513 struct mlx5_ib_mr *mr = to_mmr(ibmr);
1514 int npages = mr->npages; 1513 int npages = mr->npages;
1515 struct ib_umem *umem = mr->umem; 1514 struct ib_umem *umem = mr->umem;
1516 1515
@@ -1539,7 +1538,7 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
1539 } 1538 }
1540#endif 1539#endif
1541 1540
1542 clean_mr(mr); 1541 clean_mr(dev, mr);
1543 1542
1544 if (umem) { 1543 if (umem) {
1545 ib_umem_release(umem); 1544 ib_umem_release(umem);
@@ -1549,6 +1548,14 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
1549 return 0; 1548 return 0;
1550} 1549}
1551 1550
1551int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
1552{
1553 struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
1554 struct mlx5_ib_mr *mr = to_mmr(ibmr);
1555
1556 return dereg_mr(dev, mr);
1557}
1558
1552struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, 1559struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
1553 enum ib_mr_type mr_type, 1560 enum ib_mr_type mr_type,
1554 u32 max_num_sg) 1561 u32 max_num_sg)
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index f0dc5f4aa177..442b9bdc0f03 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -3232,7 +3232,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
3232 mr->ibmr.iova); 3232 mr->ibmr.iova);
3233 set_wqe_32bit_value(wqe->wqe_words, 3233 set_wqe_32bit_value(wqe->wqe_words,
3234 NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX, 3234 NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX,
3235 mr->ibmr.length); 3235 lower_32_bits(mr->ibmr.length));
3236 set_wqe_32bit_value(wqe->wqe_words, 3236 set_wqe_32bit_value(wqe->wqe_words,
3237 NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0); 3237 NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0);
3238 set_wqe_32bit_value(wqe->wqe_words, 3238 set_wqe_32bit_value(wqe->wqe_words,
@@ -3274,7 +3274,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
3274 mr->npages * 8); 3274 mr->npages * 8);
3275 3275
3276 nes_debug(NES_DBG_IW_TX, "SQ_REG_MR: iova_start: %llx, " 3276 nes_debug(NES_DBG_IW_TX, "SQ_REG_MR: iova_start: %llx, "
3277 "length: %d, rkey: %0x, pgl_paddr: %llx, " 3277 "length: %lld, rkey: %0x, pgl_paddr: %llx, "
3278 "page_list_len: %u, wqe_misc: %x\n", 3278 "page_list_len: %u, wqe_misc: %x\n",
3279 (unsigned long long) mr->ibmr.iova, 3279 (unsigned long long) mr->ibmr.iova,
3280 mr->ibmr.length, 3280 mr->ibmr.length,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 2e075377242e..6cd61638b441 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -1000,19 +1000,6 @@ static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
1000 */ 1000 */
1001 priv->dev->broadcast[8] = priv->pkey >> 8; 1001 priv->dev->broadcast[8] = priv->pkey >> 8;
1002 priv->dev->broadcast[9] = priv->pkey & 0xff; 1002 priv->dev->broadcast[9] = priv->pkey & 0xff;
1003
1004 /*
1005 * Update the broadcast address in the priv->broadcast object,
1006 * in case it already exists, otherwise no one will do that.
1007 */
1008 if (priv->broadcast) {
1009 spin_lock_irq(&priv->lock);
1010 memcpy(priv->broadcast->mcmember.mgid.raw,
1011 priv->dev->broadcast + 4,
1012 sizeof(union ib_gid));
1013 spin_unlock_irq(&priv->lock);
1014 }
1015
1016 return 0; 1003 return 0;
1017 } 1004 }
1018 1005
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index bac95b509a9b..dcc77014018d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2180,6 +2180,7 @@ static struct net_device *ipoib_add_port(const char *format,
2180{ 2180{
2181 struct ipoib_dev_priv *priv; 2181 struct ipoib_dev_priv *priv;
2182 struct ib_port_attr attr; 2182 struct ib_port_attr attr;
2183 struct rdma_netdev *rn;
2183 int result = -ENOMEM; 2184 int result = -ENOMEM;
2184 2185
2185 priv = ipoib_intf_alloc(hca, port, format); 2186 priv = ipoib_intf_alloc(hca, port, format);
@@ -2279,7 +2280,8 @@ register_failed:
2279 ipoib_dev_cleanup(priv->dev); 2280 ipoib_dev_cleanup(priv->dev);
2280 2281
2281device_init_failed: 2282device_init_failed:
2282 free_netdev(priv->dev); 2283 rn = netdev_priv(priv->dev);
2284 rn->free_rdma_netdev(priv->dev);
2283 kfree(priv); 2285 kfree(priv);
2284 2286
2285alloc_mem_failed: 2287alloc_mem_failed:
@@ -2328,7 +2330,7 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
2328 return; 2330 return;
2329 2331
2330 list_for_each_entry_safe(priv, tmp, dev_list, list) { 2332 list_for_each_entry_safe(priv, tmp, dev_list, list) {
2331 struct rdma_netdev *rn = netdev_priv(priv->dev); 2333 struct rdma_netdev *parent_rn = netdev_priv(priv->dev);
2332 2334
2333 ib_unregister_event_handler(&priv->event_handler); 2335 ib_unregister_event_handler(&priv->event_handler);
2334 flush_workqueue(ipoib_workqueue); 2336 flush_workqueue(ipoib_workqueue);
@@ -2350,10 +2352,15 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
2350 unregister_netdev(priv->dev); 2352 unregister_netdev(priv->dev);
2351 mutex_unlock(&priv->sysfs_mutex); 2353 mutex_unlock(&priv->sysfs_mutex);
2352 2354
2353 rn->free_rdma_netdev(priv->dev); 2355 parent_rn->free_rdma_netdev(priv->dev);
2356
2357 list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
2358 struct rdma_netdev *child_rn;
2354 2359
2355 list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) 2360 child_rn = netdev_priv(cpriv->dev);
2361 child_rn->free_rdma_netdev(cpriv->dev);
2356 kfree(cpriv); 2362 kfree(cpriv);
2363 }
2357 2364
2358 kfree(priv); 2365 kfree(priv);
2359 } 2366 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 9927cd6b7082..55a9b71ed05a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -141,14 +141,17 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
141 return restart_syscall(); 141 return restart_syscall();
142 } 142 }
143 143
144 priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name); 144 if (!down_write_trylock(&ppriv->vlan_rwsem)) {
145 if (!priv) {
146 rtnl_unlock(); 145 rtnl_unlock();
147 mutex_unlock(&ppriv->sysfs_mutex); 146 mutex_unlock(&ppriv->sysfs_mutex);
148 return -ENOMEM; 147 return restart_syscall();
149 } 148 }
150 149
151 down_write(&ppriv->vlan_rwsem); 150 priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
151 if (!priv) {
152 result = -ENOMEM;
153 goto out;
154 }
152 155
153 /* 156 /*
154 * First ensure this isn't a duplicate. We check the parent device and 157 * First ensure this isn't a duplicate. We check the parent device and
@@ -175,8 +178,11 @@ out:
175 rtnl_unlock(); 178 rtnl_unlock();
176 mutex_unlock(&ppriv->sysfs_mutex); 179 mutex_unlock(&ppriv->sysfs_mutex);
177 180
178 if (result) { 181 if (result && priv) {
179 free_netdev(priv->dev); 182 struct rdma_netdev *rn;
183
184 rn = netdev_priv(priv->dev);
185 rn->free_rdma_netdev(priv->dev);
180 kfree(priv); 186 kfree(priv);
181 } 187 }
182 188
@@ -204,7 +210,12 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
204 return restart_syscall(); 210 return restart_syscall();
205 } 211 }
206 212
207 down_write(&ppriv->vlan_rwsem); 213 if (!down_write_trylock(&ppriv->vlan_rwsem)) {
214 rtnl_unlock();
215 mutex_unlock(&ppriv->sysfs_mutex);
216 return restart_syscall();
217 }
218
208 list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) { 219 list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
209 if (priv->pkey == pkey && 220 if (priv->pkey == pkey &&
210 priv->child_type == IPOIB_LEGACY_CHILD) { 221 priv->child_type == IPOIB_LEGACY_CHILD) {
@@ -224,7 +235,10 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
224 mutex_unlock(&ppriv->sysfs_mutex); 235 mutex_unlock(&ppriv->sysfs_mutex);
225 236
226 if (dev) { 237 if (dev) {
227 free_netdev(dev); 238 struct rdma_netdev *rn;
239
240 rn = netdev_priv(dev);
241 rn->free_rdma_netdev(priv->dev);
228 kfree(priv); 242 kfree(priv);
229 return 0; 243 return 0;
230 } 244 }
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 9c3e9ab53a41..322209d5ff58 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -154,7 +154,7 @@ static void iser_dump_page_vec(struct iser_page_vec *page_vec)
154{ 154{
155 int i; 155 int i;
156 156
157 iser_err("page vec npages %d data length %d\n", 157 iser_err("page vec npages %d data length %lld\n",
158 page_vec->npages, page_vec->fake_mr.length); 158 page_vec->npages, page_vec->fake_mr.length);
159 for (i = 0; i < page_vec->npages; i++) 159 for (i = 0; i < page_vec->npages; i++)
160 iser_err("vec[%d]: %llx\n", i, page_vec->pages[i]); 160 iser_err("vec[%d]: %llx\n", i, page_vec->pages[i]);
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 382de42b8359..6fe2d0346073 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -874,7 +874,7 @@ static bool copy_device_table(void)
874 hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4); 874 hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
875 entry = (((u64) hi) << 32) + lo; 875 entry = (((u64) hi) << 32) + lo;
876 if (last_entry && last_entry != entry) { 876 if (last_entry && last_entry != entry) {
877 pr_err("IOMMU:%d should use the same dev table as others!/n", 877 pr_err("IOMMU:%d should use the same dev table as others!\n",
878 iommu->index); 878 iommu->index);
879 return false; 879 return false;
880 } 880 }
@@ -882,7 +882,7 @@ static bool copy_device_table(void)
882 882
883 old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12; 883 old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
884 if (old_devtb_size != dev_table_size) { 884 if (old_devtb_size != dev_table_size) {
885 pr_err("The device table size of IOMMU:%d is not expected!/n", 885 pr_err("The device table size of IOMMU:%d is not expected!\n",
886 iommu->index); 886 iommu->index);
887 return false; 887 return false;
888 } 888 }
@@ -890,7 +890,7 @@ static bool copy_device_table(void)
890 890
891 old_devtb_phys = entry & PAGE_MASK; 891 old_devtb_phys = entry & PAGE_MASK;
892 if (old_devtb_phys >= 0x100000000ULL) { 892 if (old_devtb_phys >= 0x100000000ULL) {
893 pr_err("The address of old device table is above 4G, not trustworthy!/n"); 893 pr_err("The address of old device table is above 4G, not trustworthy!\n");
894 return false; 894 return false;
895 } 895 }
896 old_devtb = memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB); 896 old_devtb = memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB);
@@ -901,7 +901,7 @@ static bool copy_device_table(void)
901 old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag, 901 old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag,
902 get_order(dev_table_size)); 902 get_order(dev_table_size));
903 if (old_dev_tbl_cpy == NULL) { 903 if (old_dev_tbl_cpy == NULL) {
904 pr_err("Failed to allocate memory for copying old device table!/n"); 904 pr_err("Failed to allocate memory for copying old device table!\n");
905 return false; 905 return false;
906 } 906 }
907 907
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index d665d0dc16e8..6961fc393f0b 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -245,7 +245,7 @@ static void __arm_v7s_free_table(void *table, int lvl,
245static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries, 245static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries,
246 struct io_pgtable_cfg *cfg) 246 struct io_pgtable_cfg *cfg)
247{ 247{
248 if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) 248 if (cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)
249 return; 249 return;
250 250
251 dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep), 251 dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep),
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index bd515be5b380..16d33ac19db0 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -371,7 +371,8 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
371 int ret; 371 int ret;
372 372
373 spin_lock_irqsave(&dom->pgtlock, flags); 373 spin_lock_irqsave(&dom->pgtlock, flags);
374 ret = dom->iop->map(dom->iop, iova, paddr, size, prot); 374 ret = dom->iop->map(dom->iop, iova, paddr & DMA_BIT_MASK(32),
375 size, prot);
375 spin_unlock_irqrestore(&dom->pgtlock, flags); 376 spin_unlock_irqrestore(&dom->pgtlock, flags);
376 377
377 return ret; 378 return ret;
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 40159ac12ac8..c90976d7e53c 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -175,14 +175,13 @@ static void gic_mask_irq(struct irq_data *d)
175 175
176static void gic_unmask_irq(struct irq_data *d) 176static void gic_unmask_irq(struct irq_data *d)
177{ 177{
178 struct cpumask *affinity = irq_data_get_affinity_mask(d);
179 unsigned int intr = GIC_HWIRQ_TO_SHARED(d->hwirq); 178 unsigned int intr = GIC_HWIRQ_TO_SHARED(d->hwirq);
180 unsigned int cpu; 179 unsigned int cpu;
181 180
182 write_gic_smask(intr); 181 write_gic_smask(intr);
183 182
184 gic_clear_pcpu_masks(intr); 183 gic_clear_pcpu_masks(intr);
185 cpu = cpumask_first_and(affinity, cpu_online_mask); 184 cpu = cpumask_first(irq_data_get_effective_affinity_mask(d));
186 set_bit(intr, per_cpu_ptr(pcpu_masks, cpu)); 185 set_bit(intr, per_cpu_ptr(pcpu_masks, cpu));
187} 186}
188 187
@@ -420,13 +419,17 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq,
420 irq_hw_number_t hw, unsigned int cpu) 419 irq_hw_number_t hw, unsigned int cpu)
421{ 420{
422 int intr = GIC_HWIRQ_TO_SHARED(hw); 421 int intr = GIC_HWIRQ_TO_SHARED(hw);
422 struct irq_data *data;
423 unsigned long flags; 423 unsigned long flags;
424 424
425 data = irq_get_irq_data(virq);
426
425 spin_lock_irqsave(&gic_lock, flags); 427 spin_lock_irqsave(&gic_lock, flags);
426 write_gic_map_pin(intr, GIC_MAP_PIN_MAP_TO_PIN | gic_cpu_pin); 428 write_gic_map_pin(intr, GIC_MAP_PIN_MAP_TO_PIN | gic_cpu_pin);
427 write_gic_map_vp(intr, BIT(mips_cm_vp_id(cpu))); 429 write_gic_map_vp(intr, BIT(mips_cm_vp_id(cpu)));
428 gic_clear_pcpu_masks(intr); 430 gic_clear_pcpu_masks(intr);
429 set_bit(intr, per_cpu_ptr(pcpu_masks, cpu)); 431 set_bit(intr, per_cpu_ptr(pcpu_masks, cpu));
432 irq_data_update_effective_affinity(data, cpumask_of(cpu));
430 spin_unlock_irqrestore(&gic_lock, flags); 433 spin_unlock_irqrestore(&gic_lock, flags);
431 434
432 return 0; 435 return 0;
@@ -645,7 +648,7 @@ static int __init gic_of_init(struct device_node *node,
645 648
646 /* Find the first available CPU vector. */ 649 /* Find the first available CPU vector. */
647 i = 0; 650 i = 0;
648 reserved = (C_SW0 | C_SW1) >> __fls(C_SW0); 651 reserved = (C_SW0 | C_SW1) >> __ffs(C_SW0);
649 while (!of_property_read_u32_index(node, "mti,reserved-cpu-vectors", 652 while (!of_property_read_u32_index(node, "mti,reserved-cpu-vectors",
650 i++, &cpu_vec)) 653 i++, &cpu_vec))
651 reserved |= BIT(cpu_vec); 654 reserved |= BIT(cpu_vec);
@@ -684,11 +687,11 @@ static int __init gic_of_init(struct device_node *node,
684 687
685 gicconfig = read_gic_config(); 688 gicconfig = read_gic_config();
686 gic_shared_intrs = gicconfig & GIC_CONFIG_NUMINTERRUPTS; 689 gic_shared_intrs = gicconfig & GIC_CONFIG_NUMINTERRUPTS;
687 gic_shared_intrs >>= __fls(GIC_CONFIG_NUMINTERRUPTS); 690 gic_shared_intrs >>= __ffs(GIC_CONFIG_NUMINTERRUPTS);
688 gic_shared_intrs = (gic_shared_intrs + 1) * 8; 691 gic_shared_intrs = (gic_shared_intrs + 1) * 8;
689 692
690 gic_vpes = gicconfig & GIC_CONFIG_PVPS; 693 gic_vpes = gicconfig & GIC_CONFIG_PVPS;
691 gic_vpes >>= __fls(GIC_CONFIG_PVPS); 694 gic_vpes >>= __ffs(GIC_CONFIG_PVPS);
692 gic_vpes = gic_vpes + 1; 695 gic_vpes = gic_vpes + 1;
693 696
694 if (cpu_has_veic) { 697 if (cpu_has_veic) {
diff --git a/drivers/leds/leds-as3645a.c b/drivers/leds/leds-as3645a.c
index bbbbe0898233..9a257f969300 100644
--- a/drivers/leds/leds-as3645a.c
+++ b/drivers/leds/leds-as3645a.c
@@ -112,6 +112,10 @@
112#define AS_PEAK_mA_TO_REG(a) \ 112#define AS_PEAK_mA_TO_REG(a) \
113 ((min_t(u32, AS_PEAK_mA_MAX, a) - 1250) / 250) 113 ((min_t(u32, AS_PEAK_mA_MAX, a) - 1250) / 250)
114 114
115/* LED numbers for Devicetree */
116#define AS_LED_FLASH 0
117#define AS_LED_INDICATOR 1
118
115enum as_mode { 119enum as_mode {
116 AS_MODE_EXT_TORCH = 0 << AS_CONTROL_MODE_SETTING_SHIFT, 120 AS_MODE_EXT_TORCH = 0 << AS_CONTROL_MODE_SETTING_SHIFT,
117 AS_MODE_INDICATOR = 1 << AS_CONTROL_MODE_SETTING_SHIFT, 121 AS_MODE_INDICATOR = 1 << AS_CONTROL_MODE_SETTING_SHIFT,
@@ -491,10 +495,29 @@ static int as3645a_parse_node(struct as3645a *flash,
491 struct device_node *node) 495 struct device_node *node)
492{ 496{
493 struct as3645a_config *cfg = &flash->cfg; 497 struct as3645a_config *cfg = &flash->cfg;
498 struct device_node *child;
494 const char *name; 499 const char *name;
495 int rval; 500 int rval;
496 501
497 flash->flash_node = of_get_child_by_name(node, "flash"); 502 for_each_child_of_node(node, child) {
503 u32 id = 0;
504
505 of_property_read_u32(child, "reg", &id);
506
507 switch (id) {
508 case AS_LED_FLASH:
509 flash->flash_node = of_node_get(child);
510 break;
511 case AS_LED_INDICATOR:
512 flash->indicator_node = of_node_get(child);
513 break;
514 default:
515 dev_warn(&flash->client->dev,
516 "unknown LED %u encountered, ignoring\n", id);
517 break;
518 }
519 }
520
498 if (!flash->flash_node) { 521 if (!flash->flash_node) {
499 dev_err(&flash->client->dev, "can't find flash node\n"); 522 dev_err(&flash->client->dev, "can't find flash node\n");
500 return -ENODEV; 523 return -ENODEV;
@@ -534,11 +557,10 @@ static int as3645a_parse_node(struct as3645a *flash,
534 of_property_read_u32(flash->flash_node, "voltage-reference", 557 of_property_read_u32(flash->flash_node, "voltage-reference",
535 &cfg->voltage_reference); 558 &cfg->voltage_reference);
536 559
537 of_property_read_u32(flash->flash_node, "peak-current-limit", 560 of_property_read_u32(flash->flash_node, "ams,input-max-microamp",
538 &cfg->peak); 561 &cfg->peak);
539 cfg->peak = AS_PEAK_mA_TO_REG(cfg->peak); 562 cfg->peak = AS_PEAK_mA_TO_REG(cfg->peak);
540 563
541 flash->indicator_node = of_get_child_by_name(node, "indicator");
542 if (!flash->indicator_node) { 564 if (!flash->indicator_node) {
543 dev_warn(&flash->client->dev, 565 dev_warn(&flash->client->dev,
544 "can't find indicator node\n"); 566 "can't find indicator node\n");
@@ -721,6 +743,7 @@ static int as3645a_remove(struct i2c_client *client)
721 as3645a_set_control(flash, AS_MODE_EXT_TORCH, false); 743 as3645a_set_control(flash, AS_MODE_EXT_TORCH, false);
722 744
723 v4l2_flash_release(flash->vf); 745 v4l2_flash_release(flash->vf);
746 v4l2_flash_release(flash->vfind);
724 747
725 led_classdev_flash_unregister(&flash->fled); 748 led_classdev_flash_unregister(&flash->fled);
726 led_classdev_unregister(&flash->iled_cdev); 749 led_classdev_unregister(&flash->iled_cdev);
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 5bfe285ea9d1..1ac58c5651b7 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3238,7 +3238,7 @@ static int raid_map(struct dm_target *ti, struct bio *bio)
3238 if (unlikely(bio_end_sector(bio) > mddev->array_sectors)) 3238 if (unlikely(bio_end_sector(bio) > mddev->array_sectors))
3239 return DM_MAPIO_REQUEUE; 3239 return DM_MAPIO_REQUEUE;
3240 3240
3241 mddev->pers->make_request(mddev, bio); 3241 md_handle_request(mddev, bio);
3242 3242
3243 return DM_MAPIO_SUBMITTED; 3243 return DM_MAPIO_SUBMITTED;
3244} 3244}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 08fcaebc61bd..0ff1bbf6c90e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -266,6 +266,37 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
266 * call has finished, the bio has been linked into some internal structure 266 * call has finished, the bio has been linked into some internal structure
267 * and so is visible to ->quiesce(), so we don't need the refcount any more. 267 * and so is visible to ->quiesce(), so we don't need the refcount any more.
268 */ 268 */
269void md_handle_request(struct mddev *mddev, struct bio *bio)
270{
271check_suspended:
272 rcu_read_lock();
273 if (mddev->suspended) {
274 DEFINE_WAIT(__wait);
275 for (;;) {
276 prepare_to_wait(&mddev->sb_wait, &__wait,
277 TASK_UNINTERRUPTIBLE);
278 if (!mddev->suspended)
279 break;
280 rcu_read_unlock();
281 schedule();
282 rcu_read_lock();
283 }
284 finish_wait(&mddev->sb_wait, &__wait);
285 }
286 atomic_inc(&mddev->active_io);
287 rcu_read_unlock();
288
289 if (!mddev->pers->make_request(mddev, bio)) {
290 atomic_dec(&mddev->active_io);
291 wake_up(&mddev->sb_wait);
292 goto check_suspended;
293 }
294
295 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
296 wake_up(&mddev->sb_wait);
297}
298EXPORT_SYMBOL(md_handle_request);
299
269static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) 300static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
270{ 301{
271 const int rw = bio_data_dir(bio); 302 const int rw = bio_data_dir(bio);
@@ -285,23 +316,6 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
285 bio_endio(bio); 316 bio_endio(bio);
286 return BLK_QC_T_NONE; 317 return BLK_QC_T_NONE;
287 } 318 }
288check_suspended:
289 rcu_read_lock();
290 if (mddev->suspended) {
291 DEFINE_WAIT(__wait);
292 for (;;) {
293 prepare_to_wait(&mddev->sb_wait, &__wait,
294 TASK_UNINTERRUPTIBLE);
295 if (!mddev->suspended)
296 break;
297 rcu_read_unlock();
298 schedule();
299 rcu_read_lock();
300 }
301 finish_wait(&mddev->sb_wait, &__wait);
302 }
303 atomic_inc(&mddev->active_io);
304 rcu_read_unlock();
305 319
306 /* 320 /*
307 * save the sectors now since our bio can 321 * save the sectors now since our bio can
@@ -310,20 +324,14 @@ check_suspended:
310 sectors = bio_sectors(bio); 324 sectors = bio_sectors(bio);
311 /* bio could be mergeable after passing to underlayer */ 325 /* bio could be mergeable after passing to underlayer */
312 bio->bi_opf &= ~REQ_NOMERGE; 326 bio->bi_opf &= ~REQ_NOMERGE;
313 if (!mddev->pers->make_request(mddev, bio)) { 327
314 atomic_dec(&mddev->active_io); 328 md_handle_request(mddev, bio);
315 wake_up(&mddev->sb_wait);
316 goto check_suspended;
317 }
318 329
319 cpu = part_stat_lock(); 330 cpu = part_stat_lock();
320 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); 331 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
321 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors); 332 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
322 part_stat_unlock(); 333 part_stat_unlock();
323 334
324 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
325 wake_up(&mddev->sb_wait);
326
327 return BLK_QC_T_NONE; 335 return BLK_QC_T_NONE;
328} 336}
329 337
@@ -439,16 +447,22 @@ static void md_submit_flush_data(struct work_struct *ws)
439 struct mddev *mddev = container_of(ws, struct mddev, flush_work); 447 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
440 struct bio *bio = mddev->flush_bio; 448 struct bio *bio = mddev->flush_bio;
441 449
450 /*
451 * must reset flush_bio before calling into md_handle_request to avoid a
452 * deadlock, because other bios passed md_handle_request suspend check
453 * could wait for this and below md_handle_request could wait for those
454 * bios because of suspend check
455 */
456 mddev->flush_bio = NULL;
457 wake_up(&mddev->sb_wait);
458
442 if (bio->bi_iter.bi_size == 0) 459 if (bio->bi_iter.bi_size == 0)
443 /* an empty barrier - all done */ 460 /* an empty barrier - all done */
444 bio_endio(bio); 461 bio_endio(bio);
445 else { 462 else {
446 bio->bi_opf &= ~REQ_PREFLUSH; 463 bio->bi_opf &= ~REQ_PREFLUSH;
447 mddev->pers->make_request(mddev, bio); 464 md_handle_request(mddev, bio);
448 } 465 }
449
450 mddev->flush_bio = NULL;
451 wake_up(&mddev->sb_wait);
452} 466}
453 467
454void md_flush_request(struct mddev *mddev, struct bio *bio) 468void md_flush_request(struct mddev *mddev, struct bio *bio)
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 561d22b9a9a8..d8287d3cd1bf 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -692,6 +692,7 @@ extern void md_stop_writes(struct mddev *mddev);
692extern int md_rdev_init(struct md_rdev *rdev); 692extern int md_rdev_init(struct md_rdev *rdev);
693extern void md_rdev_clear(struct md_rdev *rdev); 693extern void md_rdev_clear(struct md_rdev *rdev);
694 694
695extern void md_handle_request(struct mddev *mddev, struct bio *bio);
695extern void mddev_suspend(struct mddev *mddev); 696extern void mddev_suspend(struct mddev *mddev);
696extern void mddev_resume(struct mddev *mddev); 697extern void mddev_resume(struct mddev *mddev);
697extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, 698extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 076409455b60..928e24a07133 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6575,14 +6575,17 @@ static ssize_t
6575raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len) 6575raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
6576{ 6576{
6577 struct r5conf *conf; 6577 struct r5conf *conf;
6578 unsigned long new; 6578 unsigned int new;
6579 int err; 6579 int err;
6580 struct r5worker_group *new_groups, *old_groups; 6580 struct r5worker_group *new_groups, *old_groups;
6581 int group_cnt, worker_cnt_per_group; 6581 int group_cnt, worker_cnt_per_group;
6582 6582
6583 if (len >= PAGE_SIZE) 6583 if (len >= PAGE_SIZE)
6584 return -EINVAL; 6584 return -EINVAL;
6585 if (kstrtoul(page, 10, &new)) 6585 if (kstrtouint(page, 10, &new))
6586 return -EINVAL;
6587 /* 8192 should be big enough */
6588 if (new > 8192)
6586 return -EINVAL; 6589 return -EINVAL;
6587 6590
6588 err = mddev_lock(mddev); 6591 err = mddev_lock(mddev);
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index bbaddf18a1b3..d0ccc6729fd2 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -392,6 +392,7 @@ static const struct sdhci_pci_fixes sdhci_intel_pch_sdio = {
392 392
393enum { 393enum {
394 INTEL_DSM_FNS = 0, 394 INTEL_DSM_FNS = 0,
395 INTEL_DSM_V18_SWITCH = 3,
395 INTEL_DSM_DRV_STRENGTH = 9, 396 INTEL_DSM_DRV_STRENGTH = 9,
396 INTEL_DSM_D3_RETUNE = 10, 397 INTEL_DSM_D3_RETUNE = 10,
397}; 398};
@@ -557,6 +558,19 @@ static void intel_hs400_enhanced_strobe(struct mmc_host *mmc,
557 sdhci_writel(host, val, INTEL_HS400_ES_REG); 558 sdhci_writel(host, val, INTEL_HS400_ES_REG);
558} 559}
559 560
561static void sdhci_intel_voltage_switch(struct sdhci_host *host)
562{
563 struct sdhci_pci_slot *slot = sdhci_priv(host);
564 struct intel_host *intel_host = sdhci_pci_priv(slot);
565 struct device *dev = &slot->chip->pdev->dev;
566 u32 result = 0;
567 int err;
568
569 err = intel_dsm(intel_host, dev, INTEL_DSM_V18_SWITCH, &result);
570 pr_debug("%s: %s DSM error %d result %u\n",
571 mmc_hostname(host->mmc), __func__, err, result);
572}
573
560static const struct sdhci_ops sdhci_intel_byt_ops = { 574static const struct sdhci_ops sdhci_intel_byt_ops = {
561 .set_clock = sdhci_set_clock, 575 .set_clock = sdhci_set_clock,
562 .set_power = sdhci_intel_set_power, 576 .set_power = sdhci_intel_set_power,
@@ -565,6 +579,7 @@ static const struct sdhci_ops sdhci_intel_byt_ops = {
565 .reset = sdhci_reset, 579 .reset = sdhci_reset,
566 .set_uhs_signaling = sdhci_set_uhs_signaling, 580 .set_uhs_signaling = sdhci_set_uhs_signaling,
567 .hw_reset = sdhci_pci_hw_reset, 581 .hw_reset = sdhci_pci_hw_reset,
582 .voltage_switch = sdhci_intel_voltage_switch,
568}; 583};
569 584
570static void byt_read_dsm(struct sdhci_pci_slot *slot) 585static void byt_read_dsm(struct sdhci_pci_slot *slot)
diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c
index 12cf8288d663..a7293e186e03 100644
--- a/drivers/mmc/host/tmio_mmc_core.c
+++ b/drivers/mmc/host/tmio_mmc_core.c
@@ -129,50 +129,6 @@ static int tmio_mmc_next_sg(struct tmio_mmc_host *host)
129 129
130#define CMDREQ_TIMEOUT 5000 130#define CMDREQ_TIMEOUT 5000
131 131
132#ifdef CONFIG_MMC_DEBUG
133
134#define STATUS_TO_TEXT(a, status, i) \
135 do { \
136 if ((status) & TMIO_STAT_##a) { \
137 if ((i)++) \
138 printk(KERN_DEBUG " | "); \
139 printk(KERN_DEBUG #a); \
140 } \
141 } while (0)
142
143static void pr_debug_status(u32 status)
144{
145 int i = 0;
146
147 pr_debug("status: %08x = ", status);
148 STATUS_TO_TEXT(CARD_REMOVE, status, i);
149 STATUS_TO_TEXT(CARD_INSERT, status, i);
150 STATUS_TO_TEXT(SIGSTATE, status, i);
151 STATUS_TO_TEXT(WRPROTECT, status, i);
152 STATUS_TO_TEXT(CARD_REMOVE_A, status, i);
153 STATUS_TO_TEXT(CARD_INSERT_A, status, i);
154 STATUS_TO_TEXT(SIGSTATE_A, status, i);
155 STATUS_TO_TEXT(CMD_IDX_ERR, status, i);
156 STATUS_TO_TEXT(STOPBIT_ERR, status, i);
157 STATUS_TO_TEXT(ILL_FUNC, status, i);
158 STATUS_TO_TEXT(CMD_BUSY, status, i);
159 STATUS_TO_TEXT(CMDRESPEND, status, i);
160 STATUS_TO_TEXT(DATAEND, status, i);
161 STATUS_TO_TEXT(CRCFAIL, status, i);
162 STATUS_TO_TEXT(DATATIMEOUT, status, i);
163 STATUS_TO_TEXT(CMDTIMEOUT, status, i);
164 STATUS_TO_TEXT(RXOVERFLOW, status, i);
165 STATUS_TO_TEXT(TXUNDERRUN, status, i);
166 STATUS_TO_TEXT(RXRDY, status, i);
167 STATUS_TO_TEXT(TXRQ, status, i);
168 STATUS_TO_TEXT(ILL_ACCESS, status, i);
169 printk("\n");
170}
171
172#else
173#define pr_debug_status(s) do { } while (0)
174#endif
175
176static void tmio_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable) 132static void tmio_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
177{ 133{
178 struct tmio_mmc_host *host = mmc_priv(mmc); 134 struct tmio_mmc_host *host = mmc_priv(mmc);
@@ -762,9 +718,6 @@ irqreturn_t tmio_mmc_irq(int irq, void *devid)
762 status = sd_ctrl_read16_and_16_as_32(host, CTL_STATUS); 718 status = sd_ctrl_read16_and_16_as_32(host, CTL_STATUS);
763 ireg = status & TMIO_MASK_IRQ & ~host->sdcard_irq_mask; 719 ireg = status & TMIO_MASK_IRQ & ~host->sdcard_irq_mask;
764 720
765 pr_debug_status(status);
766 pr_debug_status(ireg);
767
768 /* Clear the status except the interrupt status */ 721 /* Clear the status except the interrupt status */
769 sd_ctrl_write32_as_16_and_16(host, CTL_STATUS, TMIO_MASK_IRQ); 722 sd_ctrl_write32_as_16_and_16(host, CTL_STATUS, TMIO_MASK_IRQ);
770 723
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 5736b0c90b33..a308e707392d 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -581,6 +581,14 @@ static struct mtd_part *allocate_partition(struct mtd_info *parent,
581 slave->mtd.erasesize = parent->erasesize; 581 slave->mtd.erasesize = parent->erasesize;
582 } 582 }
583 583
584 /*
585 * Slave erasesize might differ from the master one if the master
586 * exposes several regions with different erasesize. Adjust
587 * wr_alignment accordingly.
588 */
589 if (!(slave->mtd.flags & MTD_NO_ERASE))
590 wr_alignment = slave->mtd.erasesize;
591
584 tmp = slave->offset; 592 tmp = slave->offset;
585 remainder = do_div(tmp, wr_alignment); 593 remainder = do_div(tmp, wr_alignment);
586 if ((slave->mtd.flags & MTD_WRITEABLE) && remainder) { 594 if ((slave->mtd.flags & MTD_WRITEABLE) && remainder) {
diff --git a/drivers/mtd/nand/atmel/pmecc.c b/drivers/mtd/nand/atmel/pmecc.c
index 146af8218314..8268636675ef 100644
--- a/drivers/mtd/nand/atmel/pmecc.c
+++ b/drivers/mtd/nand/atmel/pmecc.c
@@ -363,7 +363,7 @@ atmel_pmecc_create_user(struct atmel_pmecc *pmecc,
363 size += (req->ecc.strength + 1) * sizeof(u16); 363 size += (req->ecc.strength + 1) * sizeof(u16);
364 /* Reserve space for mu, dmu and delta. */ 364 /* Reserve space for mu, dmu and delta. */
365 size = ALIGN(size, sizeof(s32)); 365 size = ALIGN(size, sizeof(s32));
366 size += (req->ecc.strength + 1) * sizeof(s32); 366 size += (req->ecc.strength + 1) * sizeof(s32) * 3;
367 367
368 user = kzalloc(size, GFP_KERNEL); 368 user = kzalloc(size, GFP_KERNEL);
369 if (!user) 369 if (!user)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index acc816b67582..bb2aad078637 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -134,8 +134,6 @@ static inline bool nvme_req_needs_retry(struct request *req)
134 return false; 134 return false;
135 if (nvme_req(req)->status & NVME_SC_DNR) 135 if (nvme_req(req)->status & NVME_SC_DNR)
136 return false; 136 return false;
137 if (jiffies - req->start_time >= req->timeout)
138 return false;
139 if (nvme_req(req)->retries >= nvme_max_retries) 137 if (nvme_req(req)->retries >= nvme_max_retries)
140 return false; 138 return false;
141 return true; 139 return true;
@@ -2590,7 +2588,7 @@ static void nvme_async_event_work(struct work_struct *work)
2590 container_of(work, struct nvme_ctrl, async_event_work); 2588 container_of(work, struct nvme_ctrl, async_event_work);
2591 2589
2592 spin_lock_irq(&ctrl->lock); 2590 spin_lock_irq(&ctrl->lock);
2593 while (ctrl->event_limit > 0) { 2591 while (ctrl->state == NVME_CTRL_LIVE && ctrl->event_limit > 0) {
2594 int aer_idx = --ctrl->event_limit; 2592 int aer_idx = --ctrl->event_limit;
2595 2593
2596 spin_unlock_irq(&ctrl->lock); 2594 spin_unlock_irq(&ctrl->lock);
@@ -2677,7 +2675,8 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
2677 /*FALLTHRU*/ 2675 /*FALLTHRU*/
2678 case NVME_SC_ABORT_REQ: 2676 case NVME_SC_ABORT_REQ:
2679 ++ctrl->event_limit; 2677 ++ctrl->event_limit;
2680 queue_work(nvme_wq, &ctrl->async_event_work); 2678 if (ctrl->state == NVME_CTRL_LIVE)
2679 queue_work(nvme_wq, &ctrl->async_event_work);
2681 break; 2680 break;
2682 default: 2681 default:
2683 break; 2682 break;
@@ -2692,7 +2691,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
2692 nvme_queue_scan(ctrl); 2691 nvme_queue_scan(ctrl);
2693 break; 2692 break;
2694 case NVME_AER_NOTICE_FW_ACT_STARTING: 2693 case NVME_AER_NOTICE_FW_ACT_STARTING:
2695 schedule_work(&ctrl->fw_act_work); 2694 queue_work(nvme_wq, &ctrl->fw_act_work);
2696 break; 2695 break;
2697 default: 2696 default:
2698 dev_warn(ctrl->device, "async event result %08x\n", result); 2697 dev_warn(ctrl->device, "async event result %08x\n", result);
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 47307752dc65..555c976cc2ee 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -565,6 +565,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
565 opts->queue_size = NVMF_DEF_QUEUE_SIZE; 565 opts->queue_size = NVMF_DEF_QUEUE_SIZE;
566 opts->nr_io_queues = num_online_cpus(); 566 opts->nr_io_queues = num_online_cpus();
567 opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY; 567 opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
568 opts->kato = NVME_DEFAULT_KATO;
568 569
569 options = o = kstrdup(buf, GFP_KERNEL); 570 options = o = kstrdup(buf, GFP_KERNEL);
570 if (!options) 571 if (!options)
@@ -655,21 +656,22 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
655 goto out; 656 goto out;
656 } 657 }
657 658
658 if (opts->discovery_nqn) {
659 pr_err("Discovery controllers cannot accept keep_alive_tmo != 0\n");
660 ret = -EINVAL;
661 goto out;
662 }
663
664 if (token < 0) { 659 if (token < 0) {
665 pr_err("Invalid keep_alive_tmo %d\n", token); 660 pr_err("Invalid keep_alive_tmo %d\n", token);
666 ret = -EINVAL; 661 ret = -EINVAL;
667 goto out; 662 goto out;
668 } else if (token == 0) { 663 } else if (token == 0 && !opts->discovery_nqn) {
669 /* Allowed for debug */ 664 /* Allowed for debug */
670 pr_warn("keep_alive_tmo 0 won't execute keep alives!!!\n"); 665 pr_warn("keep_alive_tmo 0 won't execute keep alives!!!\n");
671 } 666 }
672 opts->kato = token; 667 opts->kato = token;
668
669 if (opts->discovery_nqn && opts->kato) {
670 pr_err("Discovery controllers cannot accept KATO != 0\n");
671 ret = -EINVAL;
672 goto out;
673 }
674
673 break; 675 break;
674 case NVMF_OPT_CTRL_LOSS_TMO: 676 case NVMF_OPT_CTRL_LOSS_TMO:
675 if (match_int(args, &token)) { 677 if (match_int(args, &token)) {
@@ -762,8 +764,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
762 uuid_copy(&opts->host->id, &hostid); 764 uuid_copy(&opts->host->id, &hostid);
763 765
764out: 766out:
765 if (!opts->discovery_nqn && !opts->kato)
766 opts->kato = NVME_DEFAULT_KATO;
767 kfree(options); 767 kfree(options);
768 return ret; 768 return ret;
769} 769}
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index d2e882c0f496..af075e998944 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1376,7 +1376,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
1376 if (atomic_read(&op->state) == FCPOP_STATE_ABORTED) 1376 if (atomic_read(&op->state) == FCPOP_STATE_ABORTED)
1377 status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1); 1377 status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1);
1378 else if (freq->status) 1378 else if (freq->status)
1379 status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1379 status = cpu_to_le16(NVME_SC_INTERNAL << 1);
1380 1380
1381 /* 1381 /*
1382 * For the linux implementation, if we have an unsuccesful 1382 * For the linux implementation, if we have an unsuccesful
@@ -1404,7 +1404,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
1404 */ 1404 */
1405 if (freq->transferred_length != 1405 if (freq->transferred_length !=
1406 be32_to_cpu(op->cmd_iu.data_len)) { 1406 be32_to_cpu(op->cmd_iu.data_len)) {
1407 status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1407 status = cpu_to_le16(NVME_SC_INTERNAL << 1);
1408 goto done; 1408 goto done;
1409 } 1409 }
1410 result.u64 = 0; 1410 result.u64 = 0;
@@ -1421,7 +1421,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
1421 freq->transferred_length || 1421 freq->transferred_length ||
1422 op->rsp_iu.status_code || 1422 op->rsp_iu.status_code ||
1423 sqe->common.command_id != cqe->command_id)) { 1423 sqe->common.command_id != cqe->command_id)) {
1424 status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1424 status = cpu_to_le16(NVME_SC_INTERNAL << 1);
1425 goto done; 1425 goto done;
1426 } 1426 }
1427 result = cqe->result; 1427 result = cqe->result;
@@ -1429,7 +1429,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
1429 break; 1429 break;
1430 1430
1431 default: 1431 default:
1432 status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1432 status = cpu_to_le16(NVME_SC_INTERNAL << 1);
1433 goto done; 1433 goto done;
1434 } 1434 }
1435 1435
@@ -1989,16 +1989,17 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
1989 * as well as those by FC-NVME spec. 1989 * as well as those by FC-NVME spec.
1990 */ 1990 */
1991 WARN_ON_ONCE(sqe->common.metadata); 1991 WARN_ON_ONCE(sqe->common.metadata);
1992 WARN_ON_ONCE(sqe->common.dptr.prp1);
1993 WARN_ON_ONCE(sqe->common.dptr.prp2);
1994 sqe->common.flags |= NVME_CMD_SGL_METABUF; 1992 sqe->common.flags |= NVME_CMD_SGL_METABUF;
1995 1993
1996 /* 1994 /*
1997 * format SQE DPTR field per FC-NVME rules 1995 * format SQE DPTR field per FC-NVME rules:
1998 * type=data block descr; subtype=offset; 1996 * type=0x5 Transport SGL Data Block Descriptor
1999 * offset is currently 0. 1997 * subtype=0xA Transport-specific value
1998 * address=0
1999 * length=length of the data series
2000 */ 2000 */
2001 sqe->rw.dptr.sgl.type = NVME_SGL_FMT_OFFSET; 2001 sqe->rw.dptr.sgl.type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) |
2002 NVME_SGL_FMT_TRANSPORT_A;
2002 sqe->rw.dptr.sgl.length = cpu_to_le32(data_len); 2003 sqe->rw.dptr.sgl.length = cpu_to_le32(data_len);
2003 sqe->rw.dptr.sgl.addr = 0; 2004 sqe->rw.dptr.sgl.addr = 0;
2004 2005
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 4a2121335f48..cb73bc8cad3b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -24,6 +24,7 @@
24#include <linux/mm.h> 24#include <linux/mm.h>
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/mutex.h> 26#include <linux/mutex.h>
27#include <linux/once.h>
27#include <linux/pci.h> 28#include <linux/pci.h>
28#include <linux/poison.h> 29#include <linux/poison.h>
29#include <linux/t10-pi.h> 30#include <linux/t10-pi.h>
@@ -540,6 +541,20 @@ static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
540} 541}
541#endif 542#endif
542 543
544static void nvme_print_sgl(struct scatterlist *sgl, int nents)
545{
546 int i;
547 struct scatterlist *sg;
548
549 for_each_sg(sgl, sg, nents, i) {
550 dma_addr_t phys = sg_phys(sg);
551 pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d "
552 "dma_address:%pad dma_length:%d\n",
553 i, &phys, sg->offset, sg->length, &sg_dma_address(sg),
554 sg_dma_len(sg));
555 }
556}
557
543static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req) 558static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
544{ 559{
545 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 560 struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -622,19 +637,10 @@ static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
622 return BLK_STS_OK; 637 return BLK_STS_OK;
623 638
624 bad_sgl: 639 bad_sgl:
625 if (WARN_ONCE(1, "Invalid SGL for payload:%d nents:%d\n", 640 WARN(DO_ONCE(nvme_print_sgl, iod->sg, iod->nents),
626 blk_rq_payload_bytes(req), iod->nents)) { 641 "Invalid SGL for payload:%d nents:%d\n",
627 for_each_sg(iod->sg, sg, iod->nents, i) { 642 blk_rq_payload_bytes(req), iod->nents);
628 dma_addr_t phys = sg_phys(sg);
629 pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d "
630 "dma_address:%pad dma_length:%d\n", i, &phys,
631 sg->offset, sg->length,
632 &sg_dma_address(sg),
633 sg_dma_len(sg));
634 }
635 }
636 return BLK_STS_IOERR; 643 return BLK_STS_IOERR;
637
638} 644}
639 645
640static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, 646static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
@@ -1313,11 +1319,11 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
1313 if (result < 0) 1319 if (result < 0)
1314 goto release_cq; 1320 goto release_cq;
1315 1321
1322 nvme_init_queue(nvmeq, qid);
1316 result = queue_request_irq(nvmeq); 1323 result = queue_request_irq(nvmeq);
1317 if (result < 0) 1324 if (result < 0)
1318 goto release_sq; 1325 goto release_sq;
1319 1326
1320 nvme_init_queue(nvmeq, qid);
1321 return result; 1327 return result;
1322 1328
1323 release_sq: 1329 release_sq:
@@ -1464,6 +1470,7 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
1464 return result; 1470 return result;
1465 1471
1466 nvmeq->cq_vector = 0; 1472 nvmeq->cq_vector = 0;
1473 nvme_init_queue(nvmeq, 0);
1467 result = queue_request_irq(nvmeq); 1474 result = queue_request_irq(nvmeq);
1468 if (result) { 1475 if (result) {
1469 nvmeq->cq_vector = -1; 1476 nvmeq->cq_vector = -1;
@@ -2156,7 +2163,6 @@ static void nvme_reset_work(struct work_struct *work)
2156 if (result) 2163 if (result)
2157 goto out; 2164 goto out;
2158 2165
2159 nvme_init_queue(dev->queues[0], 0);
2160 result = nvme_alloc_admin_tags(dev); 2166 result = nvme_alloc_admin_tags(dev);
2161 if (result) 2167 if (result)
2162 goto out; 2168 goto out;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 58983000964b..92a03ff5fb4d 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -942,7 +942,12 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
942 } 942 }
943 943
944 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); 944 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
945 WARN_ON_ONCE(!changed); 945 if (!changed) {
946 /* state change failure is ok if we're in DELETING state */
947 WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
948 return;
949 }
950
946 ctrl->ctrl.nr_reconnects = 0; 951 ctrl->ctrl.nr_reconnects = 0;
947 952
948 nvme_start_ctrl(&ctrl->ctrl); 953 nvme_start_ctrl(&ctrl->ctrl);
@@ -962,7 +967,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
962 struct nvme_rdma_ctrl *ctrl = container_of(work, 967 struct nvme_rdma_ctrl *ctrl = container_of(work,
963 struct nvme_rdma_ctrl, err_work); 968 struct nvme_rdma_ctrl, err_work);
964 969
965 nvme_stop_ctrl(&ctrl->ctrl); 970 nvme_stop_keep_alive(&ctrl->ctrl);
966 971
967 if (ctrl->ctrl.queue_count > 1) { 972 if (ctrl->ctrl.queue_count > 1) {
968 nvme_stop_queues(&ctrl->ctrl); 973 nvme_stop_queues(&ctrl->ctrl);
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 7c23eaf8e563..1b208beeef50 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -390,10 +390,10 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
390 if (status) 390 if (status)
391 nvmet_set_status(req, status); 391 nvmet_set_status(req, status);
392 392
393 /* XXX: need to fill in something useful for sq_head */ 393 if (req->sq->size)
394 req->rsp->sq_head = 0; 394 req->sq->sqhd = (req->sq->sqhd + 1) % req->sq->size;
395 if (likely(req->sq)) /* may happen during early failure */ 395 req->rsp->sq_head = cpu_to_le16(req->sq->sqhd);
396 req->rsp->sq_id = cpu_to_le16(req->sq->qid); 396 req->rsp->sq_id = cpu_to_le16(req->sq->qid);
397 req->rsp->command_id = req->cmd->common.command_id; 397 req->rsp->command_id = req->cmd->common.command_id;
398 398
399 if (req->ns) 399 if (req->ns)
@@ -420,6 +420,7 @@ void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
420void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 420void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
421 u16 qid, u16 size) 421 u16 qid, u16 size)
422{ 422{
423 sq->sqhd = 0;
423 sq->qid = qid; 424 sq->qid = qid;
424 sq->size = size; 425 sq->size = size;
425 426
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index 859a66725291..db3bf6b8bf9e 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -109,9 +109,14 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
109 pr_warn("queue already connected!\n"); 109 pr_warn("queue already connected!\n");
110 return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR; 110 return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
111 } 111 }
112 if (!sqsize) {
113 pr_warn("queue size zero!\n");
114 return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
115 }
112 116
113 nvmet_cq_setup(ctrl, req->cq, qid, sqsize); 117 /* note: convert queue size from 0's-based value to 1's-based value */
114 nvmet_sq_setup(ctrl, req->sq, qid, sqsize); 118 nvmet_cq_setup(ctrl, req->cq, qid, sqsize + 1);
119 nvmet_sq_setup(ctrl, req->sq, qid, sqsize + 1);
115 return 0; 120 return 0;
116} 121}
117 122
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 421e43bf1dd7..58e010bdda3e 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -148,7 +148,7 @@ struct nvmet_fc_tgt_assoc {
148 u32 a_id; 148 u32 a_id;
149 struct nvmet_fc_tgtport *tgtport; 149 struct nvmet_fc_tgtport *tgtport;
150 struct list_head a_list; 150 struct list_head a_list;
151 struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES]; 151 struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1];
152 struct kref ref; 152 struct kref ref;
153}; 153};
154 154
@@ -608,7 +608,7 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
608 unsigned long flags; 608 unsigned long flags;
609 int ret; 609 int ret;
610 610
611 if (qid >= NVMET_NR_QUEUES) 611 if (qid > NVMET_NR_QUEUES)
612 return NULL; 612 return NULL;
613 613
614 queue = kzalloc((sizeof(*queue) + 614 queue = kzalloc((sizeof(*queue) +
@@ -783,6 +783,9 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport,
783 u16 qid = nvmet_fc_getqueueid(connection_id); 783 u16 qid = nvmet_fc_getqueueid(connection_id);
784 unsigned long flags; 784 unsigned long flags;
785 785
786 if (qid > NVMET_NR_QUEUES)
787 return NULL;
788
786 spin_lock_irqsave(&tgtport->lock, flags); 789 spin_lock_irqsave(&tgtport->lock, flags);
787 list_for_each_entry(assoc, &tgtport->assoc_list, a_list) { 790 list_for_each_entry(assoc, &tgtport->assoc_list, a_list) {
788 if (association_id == assoc->association_id) { 791 if (association_id == assoc->association_id) {
@@ -888,7 +891,7 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
888 int i; 891 int i;
889 892
890 spin_lock_irqsave(&tgtport->lock, flags); 893 spin_lock_irqsave(&tgtport->lock, flags);
891 for (i = NVMET_NR_QUEUES - 1; i >= 0; i--) { 894 for (i = NVMET_NR_QUEUES; i >= 0; i--) {
892 queue = assoc->queues[i]; 895 queue = assoc->queues[i];
893 if (queue) { 896 if (queue) {
894 if (!nvmet_fc_tgt_q_get(queue)) 897 if (!nvmet_fc_tgt_q_get(queue))
@@ -1910,8 +1913,7 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport,
1910 spin_lock_irqsave(&fod->flock, flags); 1913 spin_lock_irqsave(&fod->flock, flags);
1911 fod->writedataactive = false; 1914 fod->writedataactive = false;
1912 spin_unlock_irqrestore(&fod->flock, flags); 1915 spin_unlock_irqrestore(&fod->flock, flags);
1913 nvmet_req_complete(&fod->req, 1916 nvmet_req_complete(&fod->req, NVME_SC_INTERNAL);
1914 NVME_SC_FC_TRANSPORT_ERROR);
1915 } else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ { 1917 } else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ {
1916 fcpreq->fcp_error = ret; 1918 fcpreq->fcp_error = ret;
1917 fcpreq->transferred_length = 0; 1919 fcpreq->transferred_length = 0;
@@ -1929,8 +1931,7 @@ __nvmet_fc_fod_op_abort(struct nvmet_fc_fcp_iod *fod, bool abort)
1929 /* if in the middle of an io and we need to tear down */ 1931 /* if in the middle of an io and we need to tear down */
1930 if (abort) { 1932 if (abort) {
1931 if (fcpreq->op == NVMET_FCOP_WRITEDATA) { 1933 if (fcpreq->op == NVMET_FCOP_WRITEDATA) {
1932 nvmet_req_complete(&fod->req, 1934 nvmet_req_complete(&fod->req, NVME_SC_INTERNAL);
1933 NVME_SC_FC_TRANSPORT_ERROR);
1934 return true; 1935 return true;
1935 } 1936 }
1936 1937
@@ -1968,8 +1969,7 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod)
1968 fod->abort = true; 1969 fod->abort = true;
1969 spin_unlock(&fod->flock); 1970 spin_unlock(&fod->flock);
1970 1971
1971 nvmet_req_complete(&fod->req, 1972 nvmet_req_complete(&fod->req, NVME_SC_INTERNAL);
1972 NVME_SC_FC_TRANSPORT_ERROR);
1973 return; 1973 return;
1974 } 1974 }
1975 1975
@@ -2533,13 +2533,17 @@ nvmet_fc_remove_port(struct nvmet_port *port)
2533{ 2533{
2534 struct nvmet_fc_tgtport *tgtport = port->priv; 2534 struct nvmet_fc_tgtport *tgtport = port->priv;
2535 unsigned long flags; 2535 unsigned long flags;
2536 bool matched = false;
2536 2537
2537 spin_lock_irqsave(&nvmet_fc_tgtlock, flags); 2538 spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
2538 if (tgtport->port == port) { 2539 if (tgtport->port == port) {
2539 nvmet_fc_tgtport_put(tgtport); 2540 matched = true;
2540 tgtport->port = NULL; 2541 tgtport->port = NULL;
2541 } 2542 }
2542 spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); 2543 spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
2544
2545 if (matched)
2546 nvmet_fc_tgtport_put(tgtport);
2543} 2547}
2544 2548
2545static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = { 2549static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = {
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 1cb9847ec261..7b75d9de55ab 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -224,8 +224,6 @@ struct fcloop_nport {
224 struct fcloop_lport *lport; 224 struct fcloop_lport *lport;
225 struct list_head nport_list; 225 struct list_head nport_list;
226 struct kref ref; 226 struct kref ref;
227 struct completion rport_unreg_done;
228 struct completion tport_unreg_done;
229 u64 node_name; 227 u64 node_name;
230 u64 port_name; 228 u64 port_name;
231 u32 port_role; 229 u32 port_role;
@@ -576,7 +574,7 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
576 tfcp_req->aborted = true; 574 tfcp_req->aborted = true;
577 spin_unlock(&tfcp_req->reqlock); 575 spin_unlock(&tfcp_req->reqlock);
578 576
579 tfcp_req->status = NVME_SC_FC_TRANSPORT_ABORTED; 577 tfcp_req->status = NVME_SC_INTERNAL;
580 578
581 /* 579 /*
582 * nothing more to do. If io wasn't active, the transport should 580 * nothing more to do. If io wasn't active, the transport should
@@ -631,6 +629,32 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
631} 629}
632 630
633static void 631static void
632fcloop_nport_free(struct kref *ref)
633{
634 struct fcloop_nport *nport =
635 container_of(ref, struct fcloop_nport, ref);
636 unsigned long flags;
637
638 spin_lock_irqsave(&fcloop_lock, flags);
639 list_del(&nport->nport_list);
640 spin_unlock_irqrestore(&fcloop_lock, flags);
641
642 kfree(nport);
643}
644
645static void
646fcloop_nport_put(struct fcloop_nport *nport)
647{
648 kref_put(&nport->ref, fcloop_nport_free);
649}
650
651static int
652fcloop_nport_get(struct fcloop_nport *nport)
653{
654 return kref_get_unless_zero(&nport->ref);
655}
656
657static void
634fcloop_localport_delete(struct nvme_fc_local_port *localport) 658fcloop_localport_delete(struct nvme_fc_local_port *localport)
635{ 659{
636 struct fcloop_lport *lport = localport->private; 660 struct fcloop_lport *lport = localport->private;
@@ -644,8 +668,7 @@ fcloop_remoteport_delete(struct nvme_fc_remote_port *remoteport)
644{ 668{
645 struct fcloop_rport *rport = remoteport->private; 669 struct fcloop_rport *rport = remoteport->private;
646 670
647 /* release any threads waiting for the unreg to complete */ 671 fcloop_nport_put(rport->nport);
648 complete(&rport->nport->rport_unreg_done);
649} 672}
650 673
651static void 674static void
@@ -653,8 +676,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport)
653{ 676{
654 struct fcloop_tport *tport = targetport->private; 677 struct fcloop_tport *tport = targetport->private;
655 678
656 /* release any threads waiting for the unreg to complete */ 679 fcloop_nport_put(tport->nport);
657 complete(&tport->nport->tport_unreg_done);
658} 680}
659 681
660#define FCLOOP_HW_QUEUES 4 682#define FCLOOP_HW_QUEUES 4
@@ -722,6 +744,7 @@ fcloop_create_local_port(struct device *dev, struct device_attribute *attr,
722 goto out_free_opts; 744 goto out_free_opts;
723 } 745 }
724 746
747 memset(&pinfo, 0, sizeof(pinfo));
725 pinfo.node_name = opts->wwnn; 748 pinfo.node_name = opts->wwnn;
726 pinfo.port_name = opts->wwpn; 749 pinfo.port_name = opts->wwpn;
727 pinfo.port_role = opts->roles; 750 pinfo.port_role = opts->roles;
@@ -804,32 +827,6 @@ fcloop_delete_local_port(struct device *dev, struct device_attribute *attr,
804 return ret ? ret : count; 827 return ret ? ret : count;
805} 828}
806 829
807static void
808fcloop_nport_free(struct kref *ref)
809{
810 struct fcloop_nport *nport =
811 container_of(ref, struct fcloop_nport, ref);
812 unsigned long flags;
813
814 spin_lock_irqsave(&fcloop_lock, flags);
815 list_del(&nport->nport_list);
816 spin_unlock_irqrestore(&fcloop_lock, flags);
817
818 kfree(nport);
819}
820
821static void
822fcloop_nport_put(struct fcloop_nport *nport)
823{
824 kref_put(&nport->ref, fcloop_nport_free);
825}
826
827static int
828fcloop_nport_get(struct fcloop_nport *nport)
829{
830 return kref_get_unless_zero(&nport->ref);
831}
832
833static struct fcloop_nport * 830static struct fcloop_nport *
834fcloop_alloc_nport(const char *buf, size_t count, bool remoteport) 831fcloop_alloc_nport(const char *buf, size_t count, bool remoteport)
835{ 832{
@@ -938,6 +935,7 @@ fcloop_create_remote_port(struct device *dev, struct device_attribute *attr,
938 if (!nport) 935 if (!nport)
939 return -EIO; 936 return -EIO;
940 937
938 memset(&pinfo, 0, sizeof(pinfo));
941 pinfo.node_name = nport->node_name; 939 pinfo.node_name = nport->node_name;
942 pinfo.port_name = nport->port_name; 940 pinfo.port_name = nport->port_name;
943 pinfo.port_role = nport->port_role; 941 pinfo.port_role = nport->port_role;
@@ -979,24 +977,12 @@ __unlink_remote_port(struct fcloop_nport *nport)
979} 977}
980 978
981static int 979static int
982__wait_remoteport_unreg(struct fcloop_nport *nport, struct fcloop_rport *rport) 980__remoteport_unreg(struct fcloop_nport *nport, struct fcloop_rport *rport)
983{ 981{
984 int ret;
985
986 if (!rport) 982 if (!rport)
987 return -EALREADY; 983 return -EALREADY;
988 984
989 init_completion(&nport->rport_unreg_done); 985 return nvme_fc_unregister_remoteport(rport->remoteport);
990
991 ret = nvme_fc_unregister_remoteport(rport->remoteport);
992 if (ret)
993 return ret;
994
995 wait_for_completion(&nport->rport_unreg_done);
996
997 fcloop_nport_put(nport);
998
999 return ret;
1000} 986}
1001 987
1002static ssize_t 988static ssize_t
@@ -1029,7 +1015,7 @@ fcloop_delete_remote_port(struct device *dev, struct device_attribute *attr,
1029 if (!nport) 1015 if (!nport)
1030 return -ENOENT; 1016 return -ENOENT;
1031 1017
1032 ret = __wait_remoteport_unreg(nport, rport); 1018 ret = __remoteport_unreg(nport, rport);
1033 1019
1034 return ret ? ret : count; 1020 return ret ? ret : count;
1035} 1021}
@@ -1086,24 +1072,12 @@ __unlink_target_port(struct fcloop_nport *nport)
1086} 1072}
1087 1073
1088static int 1074static int
1089__wait_targetport_unreg(struct fcloop_nport *nport, struct fcloop_tport *tport) 1075__targetport_unreg(struct fcloop_nport *nport, struct fcloop_tport *tport)
1090{ 1076{
1091 int ret;
1092
1093 if (!tport) 1077 if (!tport)
1094 return -EALREADY; 1078 return -EALREADY;
1095 1079
1096 init_completion(&nport->tport_unreg_done); 1080 return nvmet_fc_unregister_targetport(tport->targetport);
1097
1098 ret = nvmet_fc_unregister_targetport(tport->targetport);
1099 if (ret)
1100 return ret;
1101
1102 wait_for_completion(&nport->tport_unreg_done);
1103
1104 fcloop_nport_put(nport);
1105
1106 return ret;
1107} 1081}
1108 1082
1109static ssize_t 1083static ssize_t
@@ -1136,7 +1110,7 @@ fcloop_delete_target_port(struct device *dev, struct device_attribute *attr,
1136 if (!nport) 1110 if (!nport)
1137 return -ENOENT; 1111 return -ENOENT;
1138 1112
1139 ret = __wait_targetport_unreg(nport, tport); 1113 ret = __targetport_unreg(nport, tport);
1140 1114
1141 return ret ? ret : count; 1115 return ret ? ret : count;
1142} 1116}
@@ -1223,11 +1197,11 @@ static void __exit fcloop_exit(void)
1223 1197
1224 spin_unlock_irqrestore(&fcloop_lock, flags); 1198 spin_unlock_irqrestore(&fcloop_lock, flags);
1225 1199
1226 ret = __wait_targetport_unreg(nport, tport); 1200 ret = __targetport_unreg(nport, tport);
1227 if (ret) 1201 if (ret)
1228 pr_warn("%s: Failed deleting target port\n", __func__); 1202 pr_warn("%s: Failed deleting target port\n", __func__);
1229 1203
1230 ret = __wait_remoteport_unreg(nport, rport); 1204 ret = __remoteport_unreg(nport, rport);
1231 if (ret) 1205 if (ret)
1232 pr_warn("%s: Failed deleting remote port\n", __func__); 1206 pr_warn("%s: Failed deleting remote port\n", __func__);
1233 1207
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 7d261ab894f4..7b8e20adf760 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -74,6 +74,7 @@ struct nvmet_sq {
74 struct percpu_ref ref; 74 struct percpu_ref ref;
75 u16 qid; 75 u16 qid;
76 u16 size; 76 u16 size;
77 u16 sqhd;
77 struct completion free_done; 78 struct completion free_done;
78 struct completion confirm_done; 79 struct completion confirm_done;
79}; 80};
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 1eecfa301f7f..8e075ea2743e 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -686,7 +686,7 @@ static ssize_t driver_override_store(struct device *dev,
686 const char *buf, size_t count) 686 const char *buf, size_t count)
687{ 687{
688 struct pci_dev *pdev = to_pci_dev(dev); 688 struct pci_dev *pdev = to_pci_dev(dev);
689 char *driver_override, *old = pdev->driver_override, *cp; 689 char *driver_override, *old, *cp;
690 690
691 /* We need to keep extra room for a newline */ 691 /* We need to keep extra room for a newline */
692 if (count >= (PAGE_SIZE - 1)) 692 if (count >= (PAGE_SIZE - 1))
@@ -700,12 +700,15 @@ static ssize_t driver_override_store(struct device *dev,
700 if (cp) 700 if (cp)
701 *cp = '\0'; 701 *cp = '\0';
702 702
703 device_lock(dev);
704 old = pdev->driver_override;
703 if (strlen(driver_override)) { 705 if (strlen(driver_override)) {
704 pdev->driver_override = driver_override; 706 pdev->driver_override = driver_override;
705 } else { 707 } else {
706 kfree(driver_override); 708 kfree(driver_override);
707 pdev->driver_override = NULL; 709 pdev->driver_override = NULL;
708 } 710 }
711 device_unlock(dev);
709 712
710 kfree(old); 713 kfree(old);
711 714
@@ -716,8 +719,12 @@ static ssize_t driver_override_show(struct device *dev,
716 struct device_attribute *attr, char *buf) 719 struct device_attribute *attr, char *buf)
717{ 720{
718 struct pci_dev *pdev = to_pci_dev(dev); 721 struct pci_dev *pdev = to_pci_dev(dev);
722 ssize_t len;
719 723
720 return snprintf(buf, PAGE_SIZE, "%s\n", pdev->driver_override); 724 device_lock(dev);
725 len = snprintf(buf, PAGE_SIZE, "%s\n", pdev->driver_override);
726 device_unlock(dev);
727 return len;
721} 728}
722static DEVICE_ATTR_RW(driver_override); 729static DEVICE_ATTR_RW(driver_override);
723 730
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 85de30f93a9c..56a8195096a2 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -254,10 +254,12 @@ static int bl_update_status(struct backlight_device *b)
254{ 254{
255 struct acpi_device *device = bl_get_data(b); 255 struct acpi_device *device = bl_get_data(b);
256 256
257 if (b->props.power == FB_BLANK_POWERDOWN) 257 if (fext) {
258 call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x3); 258 if (b->props.power == FB_BLANK_POWERDOWN)
259 else 259 call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x3);
260 call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x0); 260 else
261 call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x0);
262 }
261 263
262 return set_lcd_level(device, b->props.brightness); 264 return set_lcd_level(device, b->props.brightness);
263} 265}
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index a64285ab0728..af3e4d3f9735 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -699,13 +699,13 @@ static void _aac_probe_container1(void * context, struct fib * fibptr)
699 int status; 699 int status;
700 700
701 dresp = (struct aac_mount *) fib_data(fibptr); 701 dresp = (struct aac_mount *) fib_data(fibptr);
702 if (!(fibptr->dev->supplement_adapter_info.supported_options2 & 702 if (!aac_supports_2T(fibptr->dev)) {
703 AAC_OPTION_VARIABLE_BLOCK_SIZE))
704 dresp->mnt[0].capacityhigh = 0; 703 dresp->mnt[0].capacityhigh = 0;
705 if ((le32_to_cpu(dresp->status) != ST_OK) || 704 if ((le32_to_cpu(dresp->status) == ST_OK) &&
706 (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE)) { 705 (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE)) {
707 _aac_probe_container2(context, fibptr); 706 _aac_probe_container2(context, fibptr);
708 return; 707 return;
708 }
709 } 709 }
710 scsicmd = (struct scsi_cmnd *) context; 710 scsicmd = (struct scsi_cmnd *) context;
711 711
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 92fabf2b0c24..403a639574e5 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -2701,6 +2701,11 @@ static inline int aac_is_src(struct aac_dev *dev)
2701 return 0; 2701 return 0;
2702} 2702}
2703 2703
2704static inline int aac_supports_2T(struct aac_dev *dev)
2705{
2706 return (dev->adapter_info.options & AAC_OPT_NEW_COMM_64);
2707}
2708
2704char * get_container_type(unsigned type); 2709char * get_container_type(unsigned type);
2705extern int numacb; 2710extern int numacb;
2706extern char aac_driver_version[]; 2711extern char aac_driver_version[];
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 87cc4a93e637..62beb2596466 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -906,12 +906,14 @@ static int aac_eh_dev_reset(struct scsi_cmnd *cmd)
906 906
907 bus = aac_logical_to_phys(scmd_channel(cmd)); 907 bus = aac_logical_to_phys(scmd_channel(cmd));
908 cid = scmd_id(cmd); 908 cid = scmd_id(cmd);
909 info = &aac->hba_map[bus][cid]; 909
910 if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS || 910 if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS)
911 info->devtype != AAC_DEVTYPE_NATIVE_RAW)
912 return FAILED; 911 return FAILED;
913 912
914 if (info->reset_state > 0) 913 info = &aac->hba_map[bus][cid];
914
915 if (info->devtype != AAC_DEVTYPE_NATIVE_RAW &&
916 info->reset_state > 0)
915 return FAILED; 917 return FAILED;
916 918
917 pr_err("%s: Host adapter reset request. SCSI hang ?\n", 919 pr_err("%s: Host adapter reset request. SCSI hang ?\n",
@@ -962,12 +964,14 @@ static int aac_eh_target_reset(struct scsi_cmnd *cmd)
962 964
963 bus = aac_logical_to_phys(scmd_channel(cmd)); 965 bus = aac_logical_to_phys(scmd_channel(cmd));
964 cid = scmd_id(cmd); 966 cid = scmd_id(cmd);
965 info = &aac->hba_map[bus][cid]; 967
966 if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS || 968 if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS)
967 info->devtype != AAC_DEVTYPE_NATIVE_RAW)
968 return FAILED; 969 return FAILED;
969 970
970 if (info->reset_state > 0) 971 info = &aac->hba_map[bus][cid];
972
973 if (info->devtype != AAC_DEVTYPE_NATIVE_RAW &&
974 info->reset_state > 0)
971 return FAILED; 975 return FAILED;
972 976
973 pr_err("%s: Host adapter reset request. SCSI hang ?\n", 977 pr_err("%s: Host adapter reset request. SCSI hang ?\n",
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 48c2b2b34b72..0c9361c87ec8 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -740,6 +740,8 @@ static void aac_send_iop_reset(struct aac_dev *dev)
740 aac_set_intx_mode(dev); 740 aac_set_intx_mode(dev);
741 741
742 src_writel(dev, MUnit.IDR, IOP_SRC_RESET_MASK); 742 src_writel(dev, MUnit.IDR, IOP_SRC_RESET_MASK);
743
744 msleep(5000);
743} 745}
744 746
745static void aac_send_hardware_soft_reset(struct aac_dev *dev) 747static void aac_send_hardware_soft_reset(struct aac_dev *dev)
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 7e7ae786121b..100bc4c8798d 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -6131,6 +6131,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
6131 "Extents and RPI headers enabled.\n"); 6131 "Extents and RPI headers enabled.\n");
6132 } 6132 }
6133 mempool_free(mboxq, phba->mbox_mem_pool); 6133 mempool_free(mboxq, phba->mbox_mem_pool);
6134 rc = -EIO;
6134 goto out_free_bsmbx; 6135 goto out_free_bsmbx;
6135 } 6136 }
6136 6137
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 79ba3ce063a4..23bdb1ca106e 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -884,7 +884,7 @@ out_err:
884 wcqe->total_data_placed); 884 wcqe->total_data_placed);
885 nCmd->transferred_length = 0; 885 nCmd->transferred_length = 0;
886 nCmd->rcv_rsplen = 0; 886 nCmd->rcv_rsplen = 0;
887 nCmd->status = NVME_SC_FC_TRANSPORT_ERROR; 887 nCmd->status = NVME_SC_INTERNAL;
888 } 888 }
889 } 889 }
890 890
diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c
index 1f59e7a74c7b..6b33a1f24f56 100644
--- a/drivers/scsi/qla2xxx/qla_nvme.c
+++ b/drivers/scsi/qla2xxx/qla_nvme.c
@@ -180,7 +180,7 @@ static void qla_nvme_sp_done(void *ptr, int res)
180 goto rel; 180 goto rel;
181 181
182 if (unlikely(res == QLA_FUNCTION_FAILED)) 182 if (unlikely(res == QLA_FUNCTION_FAILED))
183 fd->status = NVME_SC_FC_TRANSPORT_ERROR; 183 fd->status = NVME_SC_INTERNAL;
184 else 184 else
185 fd->status = 0; 185 fd->status = 0;
186 186
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 38942050b265..dab876c65473 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -580,7 +580,8 @@ int scsi_check_sense(struct scsi_cmnd *scmd)
580 if (sshdr.asc == 0x20 || /* Invalid command operation code */ 580 if (sshdr.asc == 0x20 || /* Invalid command operation code */
581 sshdr.asc == 0x21 || /* Logical block address out of range */ 581 sshdr.asc == 0x21 || /* Logical block address out of range */
582 sshdr.asc == 0x24 || /* Invalid field in cdb */ 582 sshdr.asc == 0x24 || /* Invalid field in cdb */
583 sshdr.asc == 0x26) { /* Parameter value invalid */ 583 sshdr.asc == 0x26 || /* Parameter value invalid */
584 sshdr.asc == 0x27) { /* Write protected */
584 set_host_byte(scmd, DID_TARGET_FAILURE); 585 set_host_byte(scmd, DID_TARGET_FAILURE);
585 } 586 }
586 return SUCCESS; 587 return SUCCESS;
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index ba9d70f8a6a1..cbd4495d0ff9 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -2739,7 +2739,8 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
2739 2739
2740 list_for_each_entry(rport, &fc_host->rports, peers) { 2740 list_for_each_entry(rport, &fc_host->rports, peers) {
2741 2741
2742 if ((rport->port_state == FC_PORTSTATE_BLOCKED) && 2742 if ((rport->port_state == FC_PORTSTATE_BLOCKED ||
2743 rport->port_state == FC_PORTSTATE_NOTPRESENT) &&
2743 (rport->channel == channel)) { 2744 (rport->channel == channel)) {
2744 2745
2745 switch (fc_host->tgtid_bind_type) { 2746 switch (fc_host->tgtid_bind_type) {
@@ -2876,7 +2877,6 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
2876 memcpy(&rport->port_name, &ids->port_name, 2877 memcpy(&rport->port_name, &ids->port_name,
2877 sizeof(rport->port_name)); 2878 sizeof(rport->port_name));
2878 rport->port_id = ids->port_id; 2879 rport->port_id = ids->port_id;
2879 rport->roles = ids->roles;
2880 rport->port_state = FC_PORTSTATE_ONLINE; 2880 rport->port_state = FC_PORTSTATE_ONLINE;
2881 rport->flags &= ~FC_RPORT_FAST_FAIL_TIMEDOUT; 2881 rport->flags &= ~FC_RPORT_FAST_FAIL_TIMEDOUT;
2882 2882
@@ -2885,15 +2885,7 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
2885 fci->f->dd_fcrport_size); 2885 fci->f->dd_fcrport_size);
2886 spin_unlock_irqrestore(shost->host_lock, flags); 2886 spin_unlock_irqrestore(shost->host_lock, flags);
2887 2887
2888 if (ids->roles & FC_PORT_ROLE_FCP_TARGET) { 2888 fc_remote_port_rolechg(rport, ids->roles);
2889 scsi_target_unblock(&rport->dev, SDEV_RUNNING);
2890
2891 /* initiate a scan of the target */
2892 spin_lock_irqsave(shost->host_lock, flags);
2893 rport->flags |= FC_RPORT_SCAN_PENDING;
2894 scsi_queue_work(shost, &rport->scan_work);
2895 spin_unlock_irqrestore(shost->host_lock, flags);
2896 }
2897 return rport; 2889 return rport;
2898 } 2890 }
2899 } 2891 }
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 8934f19bce8e..0190aeff5f7f 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -3689,7 +3689,7 @@ iscsi_if_rx(struct sk_buff *skb)
3689 uint32_t group; 3689 uint32_t group;
3690 3690
3691 nlh = nlmsg_hdr(skb); 3691 nlh = nlmsg_hdr(skb);
3692 if (nlh->nlmsg_len < sizeof(*nlh) || 3692 if (nlh->nlmsg_len < sizeof(*nlh) + sizeof(*ev) ||
3693 skb->len < nlh->nlmsg_len) { 3693 skb->len < nlh->nlmsg_len) {
3694 break; 3694 break;
3695 } 3695 }
diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c
index 5fbfd9cfb6d6..5b3d57fc82d3 100644
--- a/drivers/xen/xen-pciback/conf_space_header.c
+++ b/drivers/xen/xen-pciback/conf_space_header.c
@@ -169,6 +169,9 @@ static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
169static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data) 169static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
170{ 170{
171 struct pci_bar_info *bar = data; 171 struct pci_bar_info *bar = data;
172 unsigned int pos = (offset - PCI_BASE_ADDRESS_0) / 4;
173 const struct resource *res = dev->resource;
174 u32 mask;
172 175
173 if (unlikely(!bar)) { 176 if (unlikely(!bar)) {
174 pr_warn(DRV_NAME ": driver data not found for %s\n", 177 pr_warn(DRV_NAME ": driver data not found for %s\n",
@@ -179,7 +182,13 @@ static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
179 /* A write to obtain the length must happen as a 32-bit write. 182 /* A write to obtain the length must happen as a 32-bit write.
180 * This does not (yet) support writing individual bytes 183 * This does not (yet) support writing individual bytes
181 */ 184 */
182 if (value == ~0) 185 if (res[pos].flags & IORESOURCE_IO)
186 mask = ~PCI_BASE_ADDRESS_IO_MASK;
187 else if (pos && (res[pos - 1].flags & IORESOURCE_MEM_64))
188 mask = 0;
189 else
190 mask = ~PCI_BASE_ADDRESS_MEM_MASK;
191 if ((value | mask) == ~0U)
183 bar->which = 1; 192 bar->which = 1;
184 else { 193 else {
185 u32 tmpval; 194 u32 tmpval;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index b51d23f5cafa..280384bf34f1 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -107,7 +107,8 @@ static void end_compressed_bio_read(struct bio *bio)
107 struct inode *inode; 107 struct inode *inode;
108 struct page *page; 108 struct page *page;
109 unsigned long index; 109 unsigned long index;
110 int ret; 110 unsigned int mirror = btrfs_io_bio(bio)->mirror_num;
111 int ret = 0;
111 112
112 if (bio->bi_status) 113 if (bio->bi_status)
113 cb->errors = 1; 114 cb->errors = 1;
@@ -118,6 +119,21 @@ static void end_compressed_bio_read(struct bio *bio)
118 if (!refcount_dec_and_test(&cb->pending_bios)) 119 if (!refcount_dec_and_test(&cb->pending_bios))
119 goto out; 120 goto out;
120 121
122 /*
123 * Record the correct mirror_num in cb->orig_bio so that
124 * read-repair can work properly.
125 */
126 ASSERT(btrfs_io_bio(cb->orig_bio));
127 btrfs_io_bio(cb->orig_bio)->mirror_num = mirror;
128 cb->mirror_num = mirror;
129
130 /*
131 * Some IO in this cb have failed, just skip checksum as there
132 * is no way it could be correct.
133 */
134 if (cb->errors == 1)
135 goto csum_failed;
136
121 inode = cb->inode; 137 inode = cb->inode;
122 ret = check_compressed_csum(BTRFS_I(inode), cb, 138 ret = check_compressed_csum(BTRFS_I(inode), cb,
123 (u64)bio->bi_iter.bi_sector << 9); 139 (u64)bio->bi_iter.bi_sector << 9);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5a8933da39a7..899ddaeeacec 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -709,7 +709,6 @@ struct btrfs_delayed_root;
709#define BTRFS_FS_OPEN 5 709#define BTRFS_FS_OPEN 5
710#define BTRFS_FS_QUOTA_ENABLED 6 710#define BTRFS_FS_QUOTA_ENABLED 6
711#define BTRFS_FS_QUOTA_ENABLING 7 711#define BTRFS_FS_QUOTA_ENABLING 7
712#define BTRFS_FS_QUOTA_DISABLING 8
713#define BTRFS_FS_UPDATE_UUID_TREE_GEN 9 712#define BTRFS_FS_UPDATE_UUID_TREE_GEN 9
714#define BTRFS_FS_CREATING_FREE_SPACE_TREE 10 713#define BTRFS_FS_CREATING_FREE_SPACE_TREE 10
715#define BTRFS_FS_BTREE_ERR 11 714#define BTRFS_FS_BTREE_ERR 11
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 487bbe4fb3c6..dfdab849037b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3643,7 +3643,14 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
3643 u64 flags; 3643 u64 flags;
3644 3644
3645 do_barriers = !btrfs_test_opt(fs_info, NOBARRIER); 3645 do_barriers = !btrfs_test_opt(fs_info, NOBARRIER);
3646 backup_super_roots(fs_info); 3646
3647 /*
3648 * max_mirrors == 0 indicates we're from commit_transaction,
3649 * not from fsync where the tree roots in fs_info have not
3650 * been consistent on disk.
3651 */
3652 if (max_mirrors == 0)
3653 backup_super_roots(fs_info);
3647 3654
3648 sb = fs_info->super_for_commit; 3655 sb = fs_info->super_for_commit;
3649 dev_item = &sb->dev_item; 3656 dev_item = &sb->dev_item;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3e5bb0cdd3cd..12ab19a4b93e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3471,8 +3471,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3471 unsigned int write_flags = 0; 3471 unsigned int write_flags = 0;
3472 unsigned long nr_written = 0; 3472 unsigned long nr_written = 0;
3473 3473
3474 if (wbc->sync_mode == WB_SYNC_ALL) 3474 write_flags = wbc_to_write_flags(wbc);
3475 write_flags = REQ_SYNC;
3476 3475
3477 trace___extent_writepage(page, inode, wbc); 3476 trace___extent_writepage(page, inode, wbc);
3478 3477
@@ -3718,7 +3717,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
3718 unsigned long i, num_pages; 3717 unsigned long i, num_pages;
3719 unsigned long bio_flags = 0; 3718 unsigned long bio_flags = 0;
3720 unsigned long start, end; 3719 unsigned long start, end;
3721 unsigned int write_flags = (epd->sync_io ? REQ_SYNC : 0) | REQ_META; 3720 unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
3722 int ret = 0; 3721 int ret = 0;
3723 3722
3724 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); 3723 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
@@ -4063,9 +4062,6 @@ static void flush_epd_write_bio(struct extent_page_data *epd)
4063 if (epd->bio) { 4062 if (epd->bio) {
4064 int ret; 4063 int ret;
4065 4064
4066 bio_set_op_attrs(epd->bio, REQ_OP_WRITE,
4067 epd->sync_io ? REQ_SYNC : 0);
4068
4069 ret = submit_one_bio(epd->bio, 0, epd->bio_flags); 4065 ret = submit_one_bio(epd->bio, 0, epd->bio_flags);
4070 BUG_ON(ret < 0); /* -ENOMEM */ 4066 BUG_ON(ret < 0); /* -ENOMEM */
4071 epd->bio = NULL; 4067 epd->bio = NULL;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 128f3e58634f..d94e3f68b9b1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -135,6 +135,18 @@ static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
135 const u64 offset, 135 const u64 offset,
136 const u64 bytes) 136 const u64 bytes)
137{ 137{
138 unsigned long index = offset >> PAGE_SHIFT;
139 unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
140 struct page *page;
141
142 while (index <= end_index) {
143 page = find_get_page(inode->i_mapping, index);
144 index++;
145 if (!page)
146 continue;
147 ClearPagePrivate2(page);
148 put_page(page);
149 }
138 return __endio_write_update_ordered(inode, offset + PAGE_SIZE, 150 return __endio_write_update_ordered(inode, offset + PAGE_SIZE,
139 bytes - PAGE_SIZE, false); 151 bytes - PAGE_SIZE, false);
140} 152}
@@ -8357,11 +8369,8 @@ static void btrfs_endio_direct_read(struct bio *bio)
8357 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); 8369 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
8358 blk_status_t err = bio->bi_status; 8370 blk_status_t err = bio->bi_status;
8359 8371
8360 if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) { 8372 if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
8361 err = btrfs_subio_endio_read(inode, io_bio, err); 8373 err = btrfs_subio_endio_read(inode, io_bio, err);
8362 if (!err)
8363 bio->bi_status = 0;
8364 }
8365 8374
8366 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, 8375 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
8367 dip->logical_offset + dip->bytes - 1); 8376 dip->logical_offset + dip->bytes - 1);
@@ -8369,7 +8378,7 @@ static void btrfs_endio_direct_read(struct bio *bio)
8369 8378
8370 kfree(dip); 8379 kfree(dip);
8371 8380
8372 dio_bio->bi_status = bio->bi_status; 8381 dio_bio->bi_status = err;
8373 dio_end_io(dio_bio); 8382 dio_end_io(dio_bio);
8374 8383
8375 if (io_bio->end_io) 8384 if (io_bio->end_io)
@@ -8387,6 +8396,7 @@ static void __endio_write_update_ordered(struct inode *inode,
8387 btrfs_work_func_t func; 8396 btrfs_work_func_t func;
8388 u64 ordered_offset = offset; 8397 u64 ordered_offset = offset;
8389 u64 ordered_bytes = bytes; 8398 u64 ordered_bytes = bytes;
8399 u64 last_offset;
8390 int ret; 8400 int ret;
8391 8401
8392 if (btrfs_is_free_space_inode(BTRFS_I(inode))) { 8402 if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
@@ -8398,6 +8408,7 @@ static void __endio_write_update_ordered(struct inode *inode,
8398 } 8408 }
8399 8409
8400again: 8410again:
8411 last_offset = ordered_offset;
8401 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, 8412 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
8402 &ordered_offset, 8413 &ordered_offset,
8403 ordered_bytes, 8414 ordered_bytes,
@@ -8409,6 +8420,12 @@ again:
8409 btrfs_queue_work(wq, &ordered->work); 8420 btrfs_queue_work(wq, &ordered->work);
8410out_test: 8421out_test:
8411 /* 8422 /*
8423 * If btrfs_dec_test_ordered_pending does not find any ordered extent
8424 * in the range, we can exit.
8425 */
8426 if (ordered_offset == last_offset)
8427 return;
8428 /*
8412 * our bio might span multiple ordered extents. If we haven't 8429 * our bio might span multiple ordered extents. If we haven't
8413 * completed the accounting for the whole dio, go back and try again 8430 * completed the accounting for the whole dio, go back and try again
8414 */ 8431 */
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d6715c2bcdc4..6c7a49faf4e0 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2773,9 +2773,9 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
2773 } 2773 }
2774 mutex_unlock(&fs_devices->device_list_mutex); 2774 mutex_unlock(&fs_devices->device_list_mutex);
2775 2775
2776 fi_args->nodesize = fs_info->super_copy->nodesize; 2776 fi_args->nodesize = fs_info->nodesize;
2777 fi_args->sectorsize = fs_info->super_copy->sectorsize; 2777 fi_args->sectorsize = fs_info->sectorsize;
2778 fi_args->clone_alignment = fs_info->super_copy->sectorsize; 2778 fi_args->clone_alignment = fs_info->sectorsize;
2779 2779
2780 if (copy_to_user(arg, fi_args, sizeof(*fi_args))) 2780 if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
2781 ret = -EFAULT; 2781 ret = -EFAULT;
@@ -3032,7 +3032,7 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
3032out: 3032out:
3033 if (ret) 3033 if (ret)
3034 btrfs_cmp_data_free(cmp); 3034 btrfs_cmp_data_free(cmp);
3035 return 0; 3035 return ret;
3036} 3036}
3037 3037
3038static int btrfs_cmp_data(u64 len, struct cmp_pages *cmp) 3038static int btrfs_cmp_data(u64 len, struct cmp_pages *cmp)
@@ -4061,6 +4061,10 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
4061 ret = PTR_ERR(new_root); 4061 ret = PTR_ERR(new_root);
4062 goto out; 4062 goto out;
4063 } 4063 }
4064 if (!is_fstree(new_root->objectid)) {
4065 ret = -ENOENT;
4066 goto out;
4067 }
4064 4068
4065 path = btrfs_alloc_path(); 4069 path = btrfs_alloc_path();
4066 if (!path) { 4070 if (!path) {
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 5c8b61c86e61..e172d4843eae 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -807,7 +807,6 @@ static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
807 } 807 }
808 ret = 0; 808 ret = 0;
809out: 809out:
810 set_bit(BTRFS_FS_QUOTA_DISABLING, &root->fs_info->flags);
811 btrfs_free_path(path); 810 btrfs_free_path(path);
812 return ret; 811 return ret;
813} 812}
@@ -953,7 +952,6 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
953 if (!fs_info->quota_root) 952 if (!fs_info->quota_root)
954 goto out; 953 goto out;
955 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 954 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
956 set_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags);
957 btrfs_qgroup_wait_for_completion(fs_info, false); 955 btrfs_qgroup_wait_for_completion(fs_info, false);
958 spin_lock(&fs_info->qgroup_lock); 956 spin_lock(&fs_info->qgroup_lock);
959 quota_root = fs_info->quota_root; 957 quota_root = fs_info->quota_root;
@@ -1307,6 +1305,8 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1307 } 1305 }
1308 } 1306 }
1309 ret = del_qgroup_item(trans, quota_root, qgroupid); 1307 ret = del_qgroup_item(trans, quota_root, qgroupid);
1308 if (ret && ret != -ENOENT)
1309 goto out;
1310 1310
1311 while (!list_empty(&qgroup->groups)) { 1311 while (!list_empty(&qgroup->groups)) {
1312 list = list_first_entry(&qgroup->groups, 1312 list = list_first_entry(&qgroup->groups,
@@ -2086,8 +2086,6 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
2086 2086
2087 if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags)) 2087 if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags))
2088 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 2088 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
2089 if (test_and_clear_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags))
2090 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
2091 2089
2092 spin_lock(&fs_info->qgroup_lock); 2090 spin_lock(&fs_info->qgroup_lock);
2093 while (!list_empty(&fs_info->dirty_qgroups)) { 2091 while (!list_empty(&fs_info->dirty_qgroups)) {
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 3a49a3c2fca4..9841faef08ea 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2400,11 +2400,11 @@ void free_reloc_roots(struct list_head *list)
2400 while (!list_empty(list)) { 2400 while (!list_empty(list)) {
2401 reloc_root = list_entry(list->next, struct btrfs_root, 2401 reloc_root = list_entry(list->next, struct btrfs_root,
2402 root_list); 2402 root_list);
2403 __del_reloc_root(reloc_root);
2403 free_extent_buffer(reloc_root->node); 2404 free_extent_buffer(reloc_root->node);
2404 free_extent_buffer(reloc_root->commit_root); 2405 free_extent_buffer(reloc_root->commit_root);
2405 reloc_root->node = NULL; 2406 reloc_root->node = NULL;
2406 reloc_root->commit_root = NULL; 2407 reloc_root->commit_root = NULL;
2407 __del_reloc_root(reloc_root);
2408 } 2408 }
2409} 2409}
2410 2410
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 32b043ef8ac9..8fd195cfe81b 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -2630,7 +2630,7 @@ static int send_create_inode(struct send_ctx *sctx, u64 ino)
2630 } else { 2630 } else {
2631 btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o", 2631 btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o",
2632 (int)(mode & S_IFMT)); 2632 (int)(mode & S_IFMT));
2633 ret = -ENOTSUPP; 2633 ret = -EOPNOTSUPP;
2634 goto out; 2634 goto out;
2635 } 2635 }
2636 2636
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ad7f4bab640b..c800d067fcbf 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4181,6 +4181,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
4181 struct extent_map *em, *n; 4181 struct extent_map *em, *n;
4182 struct list_head extents; 4182 struct list_head extents;
4183 struct extent_map_tree *tree = &inode->extent_tree; 4183 struct extent_map_tree *tree = &inode->extent_tree;
4184 u64 logged_start, logged_end;
4184 u64 test_gen; 4185 u64 test_gen;
4185 int ret = 0; 4186 int ret = 0;
4186 int num = 0; 4187 int num = 0;
@@ -4190,10 +4191,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
4190 down_write(&inode->dio_sem); 4191 down_write(&inode->dio_sem);
4191 write_lock(&tree->lock); 4192 write_lock(&tree->lock);
4192 test_gen = root->fs_info->last_trans_committed; 4193 test_gen = root->fs_info->last_trans_committed;
4194 logged_start = start;
4195 logged_end = end;
4193 4196
4194 list_for_each_entry_safe(em, n, &tree->modified_extents, list) { 4197 list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
4195 list_del_init(&em->list); 4198 list_del_init(&em->list);
4196
4197 /* 4199 /*
4198 * Just an arbitrary number, this can be really CPU intensive 4200 * Just an arbitrary number, this can be really CPU intensive
4199 * once we start getting a lot of extents, and really once we 4201 * once we start getting a lot of extents, and really once we
@@ -4208,6 +4210,12 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
4208 4210
4209 if (em->generation <= test_gen) 4211 if (em->generation <= test_gen)
4210 continue; 4212 continue;
4213
4214 if (em->start < logged_start)
4215 logged_start = em->start;
4216 if ((em->start + em->len - 1) > logged_end)
4217 logged_end = em->start + em->len - 1;
4218
4211 /* Need a ref to keep it from getting evicted from cache */ 4219 /* Need a ref to keep it from getting evicted from cache */
4212 refcount_inc(&em->refs); 4220 refcount_inc(&em->refs);
4213 set_bit(EXTENT_FLAG_LOGGING, &em->flags); 4221 set_bit(EXTENT_FLAG_LOGGING, &em->flags);
@@ -4216,7 +4224,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
4216 } 4224 }
4217 4225
4218 list_sort(NULL, &extents, extent_cmp); 4226 list_sort(NULL, &extents, extent_cmp);
4219 btrfs_get_logged_extents(inode, logged_list, start, end); 4227 btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end);
4220 /* 4228 /*
4221 * Some ordered extents started by fsync might have completed 4229 * Some ordered extents started by fsync might have completed
4222 * before we could collect them into the list logged_list, which 4230 * before we could collect them into the list logged_list, which
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0e8f16c305df..b39737568c22 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6166,7 +6166,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
6166 map_length = length; 6166 map_length = length;
6167 6167
6168 btrfs_bio_counter_inc_blocked(fs_info); 6168 btrfs_bio_counter_inc_blocked(fs_info);
6169 ret = __btrfs_map_block(fs_info, bio_op(bio), logical, 6169 ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
6170 &map_length, &bbio, mirror_num, 1); 6170 &map_length, &bbio, mirror_num, 1);
6171 if (ret) { 6171 if (ret) {
6172 btrfs_bio_counter_dec(fs_info); 6172 btrfs_bio_counter_dec(fs_info);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 5fa2211e49ae..62cf812ed0e5 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -229,6 +229,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
229{ 229{
230 loff_t offset = dio->iocb->ki_pos; 230 loff_t offset = dio->iocb->ki_pos;
231 ssize_t transferred = 0; 231 ssize_t transferred = 0;
232 int err;
232 233
233 /* 234 /*
234 * AIO submission can race with bio completion to get here while 235 * AIO submission can race with bio completion to get here while
@@ -258,8 +259,22 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
258 if (ret == 0) 259 if (ret == 0)
259 ret = transferred; 260 ret = transferred;
260 261
262 /*
263 * Try again to invalidate clean pages which might have been cached by
264 * non-direct readahead, or faulted in by get_user_pages() if the source
265 * of the write was an mmap'ed region of the file we're writing. Either
266 * one is a pretty crazy thing to do, so we don't support it 100%. If
267 * this invalidation fails, tough, the write still worked...
268 */
269 if (ret > 0 && dio->op == REQ_OP_WRITE &&
270 dio->inode->i_mapping->nrpages) {
271 err = invalidate_inode_pages2_range(dio->inode->i_mapping,
272 offset >> PAGE_SHIFT,
273 (offset + ret - 1) >> PAGE_SHIFT);
274 WARN_ON_ONCE(err);
275 }
276
261 if (dio->end_io) { 277 if (dio->end_io) {
262 int err;
263 278
264 // XXX: ki_pos?? 279 // XXX: ki_pos??
265 err = dio->end_io(dio->iocb, offset, ret, dio->private); 280 err = dio->end_io(dio->iocb, offset, ret, dio->private);
@@ -304,6 +319,7 @@ static void dio_bio_end_aio(struct bio *bio)
304 struct dio *dio = bio->bi_private; 319 struct dio *dio = bio->bi_private;
305 unsigned long remaining; 320 unsigned long remaining;
306 unsigned long flags; 321 unsigned long flags;
322 bool defer_completion = false;
307 323
308 /* cleanup the bio */ 324 /* cleanup the bio */
309 dio_bio_complete(dio, bio); 325 dio_bio_complete(dio, bio);
@@ -315,7 +331,19 @@ static void dio_bio_end_aio(struct bio *bio)
315 spin_unlock_irqrestore(&dio->bio_lock, flags); 331 spin_unlock_irqrestore(&dio->bio_lock, flags);
316 332
317 if (remaining == 0) { 333 if (remaining == 0) {
318 if (dio->result && dio->defer_completion) { 334 /*
335 * Defer completion when defer_completion is set or
336 * when the inode has pages mapped and this is AIO write.
337 * We need to invalidate those pages because there is a
338 * chance they contain stale data in the case buffered IO
339 * went in between AIO submission and completion into the
340 * same region.
341 */
342 if (dio->result)
343 defer_completion = dio->defer_completion ||
344 (dio->op == REQ_OP_WRITE &&
345 dio->inode->i_mapping->nrpages);
346 if (defer_completion) {
319 INIT_WORK(&dio->complete_work, dio_aio_complete_work); 347 INIT_WORK(&dio->complete_work, dio_aio_complete_work);
320 queue_work(dio->inode->i_sb->s_dio_done_wq, 348 queue_work(dio->inode->i_sb->s_dio_done_wq,
321 &dio->complete_work); 349 &dio->complete_work);
@@ -1210,10 +1238,19 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
1210 * For AIO O_(D)SYNC writes we need to defer completions to a workqueue 1238 * For AIO O_(D)SYNC writes we need to defer completions to a workqueue
1211 * so that we can call ->fsync. 1239 * so that we can call ->fsync.
1212 */ 1240 */
1213 if (dio->is_async && iov_iter_rw(iter) == WRITE && 1241 if (dio->is_async && iov_iter_rw(iter) == WRITE) {
1214 ((iocb->ki_filp->f_flags & O_DSYNC) || 1242 retval = 0;
1215 IS_SYNC(iocb->ki_filp->f_mapping->host))) { 1243 if ((iocb->ki_filp->f_flags & O_DSYNC) ||
1216 retval = dio_set_defer_completion(dio); 1244 IS_SYNC(iocb->ki_filp->f_mapping->host))
1245 retval = dio_set_defer_completion(dio);
1246 else if (!dio->inode->i_sb->s_dio_done_wq) {
1247 /*
1248 * In case of AIO write racing with buffered read we
1249 * need to defer completion. We can't decide this now,
1250 * however the workqueue needs to be initialized here.
1251 */
1252 retval = sb_init_dio_done_wq(dio->inode->i_sb);
1253 }
1217 if (retval) { 1254 if (retval) {
1218 /* 1255 /*
1219 * We grab i_mutex only for reads so we don't have 1256 * We grab i_mutex only for reads so we don't have
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 98e845b7841b..11066d8647d2 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1945,13 +1945,9 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
1945{ 1945{
1946 struct gfs2_glock_iter *gi = seq->private; 1946 struct gfs2_glock_iter *gi = seq->private;
1947 loff_t n = *pos; 1947 loff_t n = *pos;
1948 int ret;
1949
1950 if (gi->last_pos <= *pos)
1951 n = (*pos - gi->last_pos);
1952 1948
1953 ret = rhashtable_walk_start(&gi->hti); 1949 rhashtable_walk_enter(&gl_hash_table, &gi->hti);
1954 if (ret) 1950 if (rhashtable_walk_start(&gi->hti) != 0)
1955 return NULL; 1951 return NULL;
1956 1952
1957 do { 1953 do {
@@ -1959,6 +1955,7 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
1959 } while (gi->gl && n--); 1955 } while (gi->gl && n--);
1960 1956
1961 gi->last_pos = *pos; 1957 gi->last_pos = *pos;
1958
1962 return gi->gl; 1959 return gi->gl;
1963} 1960}
1964 1961
@@ -1970,6 +1967,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
1970 (*pos)++; 1967 (*pos)++;
1971 gi->last_pos = *pos; 1968 gi->last_pos = *pos;
1972 gfs2_glock_iter_next(gi); 1969 gfs2_glock_iter_next(gi);
1970
1973 return gi->gl; 1971 return gi->gl;
1974} 1972}
1975 1973
@@ -1980,6 +1978,7 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
1980 1978
1981 gi->gl = NULL; 1979 gi->gl = NULL;
1982 rhashtable_walk_stop(&gi->hti); 1980 rhashtable_walk_stop(&gi->hti);
1981 rhashtable_walk_exit(&gi->hti);
1983} 1982}
1984 1983
1985static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) 1984static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
@@ -2042,12 +2041,10 @@ static int __gfs2_glocks_open(struct inode *inode, struct file *file,
2042 struct gfs2_glock_iter *gi = seq->private; 2041 struct gfs2_glock_iter *gi = seq->private;
2043 2042
2044 gi->sdp = inode->i_private; 2043 gi->sdp = inode->i_private;
2045 gi->last_pos = 0;
2046 seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); 2044 seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
2047 if (seq->buf) 2045 if (seq->buf)
2048 seq->size = GFS2_SEQ_GOODSIZE; 2046 seq->size = GFS2_SEQ_GOODSIZE;
2049 gi->gl = NULL; 2047 gi->gl = NULL;
2050 rhashtable_walk_enter(&gl_hash_table, &gi->hti);
2051 } 2048 }
2052 return ret; 2049 return ret;
2053} 2050}
@@ -2063,7 +2060,6 @@ static int gfs2_glocks_release(struct inode *inode, struct file *file)
2063 struct gfs2_glock_iter *gi = seq->private; 2060 struct gfs2_glock_iter *gi = seq->private;
2064 2061
2065 gi->gl = NULL; 2062 gi->gl = NULL;
2066 rhashtable_walk_exit(&gi->hti);
2067 return seq_release_private(inode, file); 2063 return seq_release_private(inode, file);
2068} 2064}
2069 2065
diff --git a/fs/iomap.c b/fs/iomap.c
index 269b24a01f32..be61cf742b5e 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -713,8 +713,24 @@ struct iomap_dio {
713static ssize_t iomap_dio_complete(struct iomap_dio *dio) 713static ssize_t iomap_dio_complete(struct iomap_dio *dio)
714{ 714{
715 struct kiocb *iocb = dio->iocb; 715 struct kiocb *iocb = dio->iocb;
716 struct inode *inode = file_inode(iocb->ki_filp);
716 ssize_t ret; 717 ssize_t ret;
717 718
719 /*
720 * Try again to invalidate clean pages which might have been cached by
721 * non-direct readahead, or faulted in by get_user_pages() if the source
722 * of the write was an mmap'ed region of the file we're writing. Either
723 * one is a pretty crazy thing to do, so we don't support it 100%. If
724 * this invalidation fails, tough, the write still worked...
725 */
726 if (!dio->error &&
727 (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
728 ret = invalidate_inode_pages2_range(inode->i_mapping,
729 iocb->ki_pos >> PAGE_SHIFT,
730 (iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT);
731 WARN_ON_ONCE(ret);
732 }
733
718 if (dio->end_io) { 734 if (dio->end_io) {
719 ret = dio->end_io(iocb, 735 ret = dio->end_io(iocb,
720 dio->error ? dio->error : dio->size, 736 dio->error ? dio->error : dio->size,
@@ -993,6 +1009,13 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
993 WARN_ON_ONCE(ret); 1009 WARN_ON_ONCE(ret);
994 ret = 0; 1010 ret = 0;
995 1011
1012 if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
1013 !inode->i_sb->s_dio_done_wq) {
1014 ret = sb_init_dio_done_wq(inode->i_sb);
1015 if (ret < 0)
1016 goto out_free_dio;
1017 }
1018
996 inode_dio_begin(inode); 1019 inode_dio_begin(inode);
997 1020
998 blk_start_plug(&plug); 1021 blk_start_plug(&plug);
@@ -1015,13 +1038,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
1015 if (ret < 0) 1038 if (ret < 0)
1016 iomap_dio_set_error(dio, ret); 1039 iomap_dio_set_error(dio, ret);
1017 1040
1018 if (ret >= 0 && iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
1019 !inode->i_sb->s_dio_done_wq) {
1020 ret = sb_init_dio_done_wq(inode->i_sb);
1021 if (ret < 0)
1022 iomap_dio_set_error(dio, ret);
1023 }
1024
1025 if (!atomic_dec_and_test(&dio->ref)) { 1041 if (!atomic_dec_and_test(&dio->ref)) {
1026 if (!is_sync_kiocb(iocb)) 1042 if (!is_sync_kiocb(iocb))
1027 return -EIOCBQUEUED; 1043 return -EIOCBQUEUED;
@@ -1042,19 +1058,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
1042 1058
1043 ret = iomap_dio_complete(dio); 1059 ret = iomap_dio_complete(dio);
1044 1060
1045 /*
1046 * Try again to invalidate clean pages which might have been cached by
1047 * non-direct readahead, or faulted in by get_user_pages() if the source
1048 * of the write was an mmap'ed region of the file we're writing. Either
1049 * one is a pretty crazy thing to do, so we don't support it 100%. If
1050 * this invalidation fails, tough, the write still worked...
1051 */
1052 if (iov_iter_rw(iter) == WRITE) {
1053 int err = invalidate_inode_pages2_range(mapping,
1054 start >> PAGE_SHIFT, end >> PAGE_SHIFT);
1055 WARN_ON_ONCE(err);
1056 }
1057
1058 return ret; 1061 return ret;
1059 1062
1060out_free_dio: 1063out_free_dio:
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index db692f554158..447a24d77b89 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -514,9 +514,11 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root)
514 if (sbi->s_fmode != ISOFS_INVALID_MODE) 514 if (sbi->s_fmode != ISOFS_INVALID_MODE)
515 seq_printf(m, ",fmode=%o", sbi->s_fmode); 515 seq_printf(m, ",fmode=%o", sbi->s_fmode);
516 516
517#ifdef CONFIG_JOLIET
517 if (sbi->s_nls_iocharset && 518 if (sbi->s_nls_iocharset &&
518 strcmp(sbi->s_nls_iocharset->charset, CONFIG_NLS_DEFAULT) != 0) 519 strcmp(sbi->s_nls_iocharset->charset, CONFIG_NLS_DEFAULT) != 0)
519 seq_printf(m, ",iocharset=%s", sbi->s_nls_iocharset->charset); 520 seq_printf(m, ",iocharset=%s", sbi->s_nls_iocharset->charset);
521#endif
520 return 0; 522 return 0;
521} 523}
522 524
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 525157ca25cb..77a8eacbe032 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -119,30 +119,25 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
119 * simple bit tests. 119 * simple bit tests.
120 */ 120 */
121static const char * const task_state_array[] = { 121static const char * const task_state_array[] = {
122 "R (running)", /* 0 */ 122
123 "S (sleeping)", /* 1 */ 123 /* states in TASK_REPORT: */
124 "D (disk sleep)", /* 2 */ 124 "R (running)", /* 0x00 */
125 "T (stopped)", /* 4 */ 125 "S (sleeping)", /* 0x01 */
126 "t (tracing stop)", /* 8 */ 126 "D (disk sleep)", /* 0x02 */
127 "X (dead)", /* 16 */ 127 "T (stopped)", /* 0x04 */
128 "Z (zombie)", /* 32 */ 128 "t (tracing stop)", /* 0x08 */
129 "X (dead)", /* 0x10 */
130 "Z (zombie)", /* 0x20 */
131 "P (parked)", /* 0x40 */
132
133 /* states beyond TASK_REPORT: */
134 "I (idle)", /* 0x80 */
129}; 135};
130 136
131static inline const char *get_task_state(struct task_struct *tsk) 137static inline const char *get_task_state(struct task_struct *tsk)
132{ 138{
133 unsigned int state = (tsk->state | tsk->exit_state) & TASK_REPORT; 139 BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != ARRAY_SIZE(task_state_array));
134 140 return task_state_array[__get_task_state(tsk)];
135 /*
136 * Parked tasks do not run; they sit in __kthread_parkme().
137 * Without this check, we would report them as running, which is
138 * clearly wrong, so we report them as sleeping instead.
139 */
140 if (tsk->state == TASK_PARKED)
141 state = TASK_INTERRUPTIBLE;
142
143 BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != ARRAY_SIZE(task_state_array)-1);
144
145 return task_state_array[fls(state)];
146} 141}
147 142
148static inline int get_task_umask(struct task_struct *tsk) 143static inline int get_task_umask(struct task_struct *tsk)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 8381db9db6d9..50b0556a124f 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1980,7 +1980,9 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1980 ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, 0, 1980 ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, 0,
1981 &warn_to[cnt]); 1981 &warn_to[cnt]);
1982 if (ret) { 1982 if (ret) {
1983 spin_lock(&transfer_to[cnt]->dq_dqb_lock);
1983 dquot_decr_inodes(transfer_to[cnt], inode_usage); 1984 dquot_decr_inodes(transfer_to[cnt], inode_usage);
1985 spin_unlock(&transfer_to[cnt]->dq_dqb_lock);
1984 goto over_quota; 1986 goto over_quota;
1985 } 1987 }
1986 } 1988 }
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index c0187cda2c1e..a73e5b34db41 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -328,12 +328,16 @@ static int v2_write_dquot(struct dquot *dquot)
328 if (!dquot->dq_off) { 328 if (!dquot->dq_off) {
329 alloc = true; 329 alloc = true;
330 down_write(&dqopt->dqio_sem); 330 down_write(&dqopt->dqio_sem);
331 } else {
332 down_read(&dqopt->dqio_sem);
331 } 333 }
332 ret = qtree_write_dquot( 334 ret = qtree_write_dquot(
333 sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, 335 sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv,
334 dquot); 336 dquot);
335 if (alloc) 337 if (alloc)
336 up_write(&dqopt->dqio_sem); 338 up_write(&dqopt->dqio_sem);
339 else
340 up_read(&dqopt->dqio_sem);
337 return ret; 341 return ret;
338} 342}
339 343
diff --git a/fs/read_write.c b/fs/read_write.c
index a2b9a47235c5..f0d4b16873e8 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -112,7 +112,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
112 * In the generic case the entire file is data, so as long as 112 * In the generic case the entire file is data, so as long as
113 * offset isn't at the end of the file then the offset is data. 113 * offset isn't at the end of the file then the offset is data.
114 */ 114 */
115 if (offset >= eof) 115 if ((unsigned long long)offset >= eof)
116 return -ENXIO; 116 return -ENXIO;
117 break; 117 break;
118 case SEEK_HOLE: 118 case SEEK_HOLE:
@@ -120,7 +120,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
120 * There is a virtual hole at the end of the file, so as long as 120 * There is a virtual hole at the end of the file, so as long as
121 * offset isn't i_size or larger, return i_size. 121 * offset isn't i_size or larger, return i_size.
122 */ 122 */
123 if (offset >= eof) 123 if ((unsigned long long)offset >= eof)
124 return -ENXIO; 124 return -ENXIO;
125 offset = eof; 125 offset = eof;
126 break; 126 break;
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index b008ff3250eb..df3e600835e8 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -156,7 +156,8 @@ __xfs_ag_resv_free(
156 trace_xfs_ag_resv_free(pag, type, 0); 156 trace_xfs_ag_resv_free(pag, type, 0);
157 157
158 resv = xfs_perag_resv(pag, type); 158 resv = xfs_perag_resv(pag, type);
159 pag->pag_mount->m_ag_max_usable += resv->ar_asked; 159 if (pag->pag_agno == 0)
160 pag->pag_mount->m_ag_max_usable += resv->ar_asked;
160 /* 161 /*
161 * AGFL blocks are always considered "free", so whatever 162 * AGFL blocks are always considered "free", so whatever
162 * was reserved at mount time must be given back at umount. 163 * was reserved at mount time must be given back at umount.
@@ -216,7 +217,14 @@ __xfs_ag_resv_init(
216 return error; 217 return error;
217 } 218 }
218 219
219 mp->m_ag_max_usable -= ask; 220 /*
221 * Reduce the maximum per-AG allocation length by however much we're
222 * trying to reserve for an AG. Since this is a filesystem-wide
223 * counter, we only make the adjustment for AG 0. This assumes that
224 * there aren't any AGs hungrier for per-AG reservation than AG 0.
225 */
226 if (pag->pag_agno == 0)
227 mp->m_ag_max_usable -= ask;
220 228
221 resv = xfs_perag_resv(pag, type); 229 resv = xfs_perag_resv(pag, type);
222 resv->ar_asked = ask; 230 resv->ar_asked = ask;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 459f4b4f08fe..044a363119be 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -49,7 +49,6 @@
49#include "xfs_rmap.h" 49#include "xfs_rmap.h"
50#include "xfs_ag_resv.h" 50#include "xfs_ag_resv.h"
51#include "xfs_refcount.h" 51#include "xfs_refcount.h"
52#include "xfs_rmap_btree.h"
53#include "xfs_icache.h" 52#include "xfs_icache.h"
54 53
55 54
@@ -192,12 +191,8 @@ xfs_bmap_worst_indlen(
192 int maxrecs; /* maximum record count at this level */ 191 int maxrecs; /* maximum record count at this level */
193 xfs_mount_t *mp; /* mount structure */ 192 xfs_mount_t *mp; /* mount structure */
194 xfs_filblks_t rval; /* return value */ 193 xfs_filblks_t rval; /* return value */
195 xfs_filblks_t orig_len;
196 194
197 mp = ip->i_mount; 195 mp = ip->i_mount;
198
199 /* Calculate the worst-case size of the bmbt. */
200 orig_len = len;
201 maxrecs = mp->m_bmap_dmxr[0]; 196 maxrecs = mp->m_bmap_dmxr[0];
202 for (level = 0, rval = 0; 197 for (level = 0, rval = 0;
203 level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); 198 level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
@@ -205,20 +200,12 @@ xfs_bmap_worst_indlen(
205 len += maxrecs - 1; 200 len += maxrecs - 1;
206 do_div(len, maxrecs); 201 do_div(len, maxrecs);
207 rval += len; 202 rval += len;
208 if (len == 1) { 203 if (len == 1)
209 rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 204 return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
210 level - 1; 205 level - 1;
211 break;
212 }
213 if (level == 0) 206 if (level == 0)
214 maxrecs = mp->m_bmap_dmxr[1]; 207 maxrecs = mp->m_bmap_dmxr[1];
215 } 208 }
216
217 /* Calculate the worst-case size of the rmapbt. */
218 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
219 rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) +
220 mp->m_rmap_maxlevels;
221
222 return rval; 209 return rval;
223} 210}
224 211
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 29172609f2a3..f18e5932aec4 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -343,7 +343,8 @@ xfs_end_io(
343 error = xfs_reflink_end_cow(ip, offset, size); 343 error = xfs_reflink_end_cow(ip, offset, size);
344 break; 344 break;
345 case XFS_IO_UNWRITTEN: 345 case XFS_IO_UNWRITTEN:
346 error = xfs_iomap_write_unwritten(ip, offset, size); 346 /* writeback should never update isize */
347 error = xfs_iomap_write_unwritten(ip, offset, size, false);
347 break; 348 break;
348 default: 349 default:
349 ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); 350 ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index cd9a5400ba4f..bc6c6e10a969 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1459,7 +1459,19 @@ xfs_shift_file_space(
1459 return error; 1459 return error;
1460 1460
1461 /* 1461 /*
1462 * The extent shiting code works on extent granularity. So, if 1462 * Clean out anything hanging around in the cow fork now that
1463 * we've flushed all the dirty data out to disk to avoid having
1464 * CoW extents at the wrong offsets.
1465 */
1466 if (xfs_is_reflink_inode(ip)) {
1467 error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF,
1468 true);
1469 if (error)
1470 return error;
1471 }
1472
1473 /*
1474 * The extent shifting code works on extent granularity. So, if
1463 * stop_fsb is not the starting block of extent, we need to split 1475 * stop_fsb is not the starting block of extent, we need to split
1464 * the extent at stop_fsb. 1476 * the extent at stop_fsb.
1465 */ 1477 */
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index da14658da310..2f97c12ca75e 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1258,8 +1258,6 @@ xfs_buf_ioapply_map(
1258 int size; 1258 int size;
1259 int offset; 1259 int offset;
1260 1260
1261 total_nr_pages = bp->b_page_count;
1262
1263 /* skip the pages in the buffer before the start offset */ 1261 /* skip the pages in the buffer before the start offset */
1264 page_index = 0; 1262 page_index = 0;
1265 offset = *buf_offset; 1263 offset = *buf_offset;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index bd786a9ac2c3..eaf86f55b7f2 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -347,7 +347,7 @@ xfs_verifier_error(
347{ 347{
348 struct xfs_mount *mp = bp->b_target->bt_mount; 348 struct xfs_mount *mp = bp->b_target->bt_mount;
349 349
350 xfs_alert(mp, "Metadata %s detected at %pF, %s block 0x%llx", 350 xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx",
351 bp->b_error == -EFSBADCRC ? "CRC error" : "corruption", 351 bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
352 __return_address, bp->b_ops->name, bp->b_bn); 352 __return_address, bp->b_ops->name, bp->b_bn);
353 353
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ebdd0bd2b261..309e26c9dddb 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -58,7 +58,7 @@ xfs_zero_range(
58 xfs_off_t count, 58 xfs_off_t count,
59 bool *did_zero) 59 bool *did_zero)
60{ 60{
61 return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops); 61 return iomap_zero_range(VFS_I(ip), pos, count, did_zero, &xfs_iomap_ops);
62} 62}
63 63
64int 64int
@@ -377,8 +377,6 @@ restart:
377 */ 377 */
378 spin_lock(&ip->i_flags_lock); 378 spin_lock(&ip->i_flags_lock);
379 if (iocb->ki_pos > i_size_read(inode)) { 379 if (iocb->ki_pos > i_size_read(inode)) {
380 bool zero = false;
381
382 spin_unlock(&ip->i_flags_lock); 380 spin_unlock(&ip->i_flags_lock);
383 if (!drained_dio) { 381 if (!drained_dio) {
384 if (*iolock == XFS_IOLOCK_SHARED) { 382 if (*iolock == XFS_IOLOCK_SHARED) {
@@ -399,7 +397,7 @@ restart:
399 drained_dio = true; 397 drained_dio = true;
400 goto restart; 398 goto restart;
401 } 399 }
402 error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero); 400 error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), NULL);
403 if (error) 401 if (error)
404 return error; 402 return error;
405 } else 403 } else
@@ -436,7 +434,6 @@ xfs_dio_write_end_io(
436 struct inode *inode = file_inode(iocb->ki_filp); 434 struct inode *inode = file_inode(iocb->ki_filp);
437 struct xfs_inode *ip = XFS_I(inode); 435 struct xfs_inode *ip = XFS_I(inode);
438 loff_t offset = iocb->ki_pos; 436 loff_t offset = iocb->ki_pos;
439 bool update_size = false;
440 int error = 0; 437 int error = 0;
441 438
442 trace_xfs_end_io_direct_write(ip, offset, size); 439 trace_xfs_end_io_direct_write(ip, offset, size);
@@ -447,6 +444,21 @@ xfs_dio_write_end_io(
447 if (size <= 0) 444 if (size <= 0)
448 return size; 445 return size;
449 446
447 if (flags & IOMAP_DIO_COW) {
448 error = xfs_reflink_end_cow(ip, offset, size);
449 if (error)
450 return error;
451 }
452
453 /*
454 * Unwritten conversion updates the in-core isize after extent
455 * conversion but before updating the on-disk size. Updating isize any
456 * earlier allows a racing dio read to find unwritten extents before
457 * they are converted.
458 */
459 if (flags & IOMAP_DIO_UNWRITTEN)
460 return xfs_iomap_write_unwritten(ip, offset, size, true);
461
450 /* 462 /*
451 * We need to update the in-core inode size here so that we don't end up 463 * We need to update the in-core inode size here so that we don't end up
452 * with the on-disk inode size being outside the in-core inode size. We 464 * with the on-disk inode size being outside the in-core inode size. We
@@ -461,20 +473,11 @@ xfs_dio_write_end_io(
461 spin_lock(&ip->i_flags_lock); 473 spin_lock(&ip->i_flags_lock);
462 if (offset + size > i_size_read(inode)) { 474 if (offset + size > i_size_read(inode)) {
463 i_size_write(inode, offset + size); 475 i_size_write(inode, offset + size);
464 update_size = true; 476 spin_unlock(&ip->i_flags_lock);
465 }
466 spin_unlock(&ip->i_flags_lock);
467
468 if (flags & IOMAP_DIO_COW) {
469 error = xfs_reflink_end_cow(ip, offset, size);
470 if (error)
471 return error;
472 }
473
474 if (flags & IOMAP_DIO_UNWRITTEN)
475 error = xfs_iomap_write_unwritten(ip, offset, size);
476 else if (update_size)
477 error = xfs_setfilesize(ip, offset, size); 477 error = xfs_setfilesize(ip, offset, size);
478 } else {
479 spin_unlock(&ip->i_flags_lock);
480 }
478 481
479 return error; 482 return error;
480} 483}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5599dda4727a..4ec5b7f45401 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1624,10 +1624,12 @@ xfs_itruncate_extents(
1624 goto out; 1624 goto out;
1625 1625
1626 /* 1626 /*
1627 * Clear the reflink flag if we truncated everything. 1627 * Clear the reflink flag if there are no data fork blocks and
1628 * there are no extents staged in the cow fork.
1628 */ 1629 */
1629 if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) { 1630 if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
1630 ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; 1631 if (ip->i_d.di_nblocks == 0)
1632 ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
1631 xfs_inode_clear_cowblocks_tag(ip); 1633 xfs_inode_clear_cowblocks_tag(ip);
1632 } 1634 }
1633 1635
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 6d0f74ec31e8..a705f34b58fa 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -745,7 +745,7 @@ xfs_iflush_done(
745 */ 745 */
746 iip = INODE_ITEM(blip); 746 iip = INODE_ITEM(blip);
747 if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) || 747 if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
748 lip->li_flags & XFS_LI_FAILED) 748 (blip->li_flags & XFS_LI_FAILED))
749 need_ail++; 749 need_ail++;
750 750
751 blip = next; 751 blip = next;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 5049e8ab6e30..aa75389be8cf 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1088,6 +1088,7 @@ xfs_ioctl_setattr_dax_invalidate(
1088 int *join_flags) 1088 int *join_flags)
1089{ 1089{
1090 struct inode *inode = VFS_I(ip); 1090 struct inode *inode = VFS_I(ip);
1091 struct super_block *sb = inode->i_sb;
1091 int error; 1092 int error;
1092 1093
1093 *join_flags = 0; 1094 *join_flags = 0;
@@ -1100,7 +1101,7 @@ xfs_ioctl_setattr_dax_invalidate(
1100 if (fa->fsx_xflags & FS_XFLAG_DAX) { 1101 if (fa->fsx_xflags & FS_XFLAG_DAX) {
1101 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) 1102 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
1102 return -EINVAL; 1103 return -EINVAL;
1103 if (ip->i_mount->m_sb.sb_blocksize != PAGE_SIZE) 1104 if (bdev_dax_supported(sb, sb->s_blocksize) < 0)
1104 return -EINVAL; 1105 return -EINVAL;
1105 } 1106 }
1106 1107
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index a1909bc064e9..f179bdf1644d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -829,7 +829,8 @@ int
829xfs_iomap_write_unwritten( 829xfs_iomap_write_unwritten(
830 xfs_inode_t *ip, 830 xfs_inode_t *ip,
831 xfs_off_t offset, 831 xfs_off_t offset,
832 xfs_off_t count) 832 xfs_off_t count,
833 bool update_isize)
833{ 834{
834 xfs_mount_t *mp = ip->i_mount; 835 xfs_mount_t *mp = ip->i_mount;
835 xfs_fileoff_t offset_fsb; 836 xfs_fileoff_t offset_fsb;
@@ -840,6 +841,7 @@ xfs_iomap_write_unwritten(
840 xfs_trans_t *tp; 841 xfs_trans_t *tp;
841 xfs_bmbt_irec_t imap; 842 xfs_bmbt_irec_t imap;
842 struct xfs_defer_ops dfops; 843 struct xfs_defer_ops dfops;
844 struct inode *inode = VFS_I(ip);
843 xfs_fsize_t i_size; 845 xfs_fsize_t i_size;
844 uint resblks; 846 uint resblks;
845 int error; 847 int error;
@@ -899,7 +901,8 @@ xfs_iomap_write_unwritten(
899 i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb); 901 i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb);
900 if (i_size > offset + count) 902 if (i_size > offset + count)
901 i_size = offset + count; 903 i_size = offset + count;
902 904 if (update_isize && i_size > i_size_read(inode))
905 i_size_write(inode, i_size);
903 i_size = xfs_new_eof(ip, i_size); 906 i_size = xfs_new_eof(ip, i_size);
904 if (i_size) { 907 if (i_size) {
905 ip->i_d.di_size = i_size; 908 ip->i_d.di_size = i_size;
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 00db3ecea084..ee535065c5d0 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -27,7 +27,7 @@ int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
27 struct xfs_bmbt_irec *, int); 27 struct xfs_bmbt_irec *, int);
28int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t, 28int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
29 struct xfs_bmbt_irec *); 29 struct xfs_bmbt_irec *);
30int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t); 30int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
31 31
32void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, 32void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
33 struct xfs_bmbt_irec *); 33 struct xfs_bmbt_irec *);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 2f2dc3c09ad0..4246876df7b7 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -274,7 +274,7 @@ xfs_fs_commit_blocks(
274 (end - 1) >> PAGE_SHIFT); 274 (end - 1) >> PAGE_SHIFT);
275 WARN_ON_ONCE(error); 275 WARN_ON_ONCE(error);
276 276
277 error = xfs_iomap_write_unwritten(ip, start, length); 277 error = xfs_iomap_write_unwritten(ip, start, length, false);
278 if (error) 278 if (error)
279 goto out_drop_iolock; 279 goto out_drop_iolock;
280 } 280 }
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index c996f4ae4a5f..584cf2d573ba 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1654,6 +1654,16 @@ xfs_fs_fill_super(
1654 "DAX and reflink have not been tested together!"); 1654 "DAX and reflink have not been tested together!");
1655 } 1655 }
1656 1656
1657 if (mp->m_flags & XFS_MOUNT_DISCARD) {
1658 struct request_queue *q = bdev_get_queue(sb->s_bdev);
1659
1660 if (!blk_queue_discard(q)) {
1661 xfs_warn(mp, "mounting with \"discard\" option, but "
1662 "the device does not support discard");
1663 mp->m_flags &= ~XFS_MOUNT_DISCARD;
1664 }
1665 }
1666
1657 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { 1667 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
1658 if (mp->m_sb.sb_rblocks) { 1668 if (mp->m_sb.sb_rblocks) {
1659 xfs_alert(mp, 1669 xfs_alert(mp,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 460294bb0fa5..02fa42d24b52 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -551,6 +551,7 @@ struct request_queue {
551 int node; 551 int node;
552#ifdef CONFIG_BLK_DEV_IO_TRACE 552#ifdef CONFIG_BLK_DEV_IO_TRACE
553 struct blk_trace *blk_trace; 553 struct blk_trace *blk_trace;
554 struct mutex blk_trace_mutex;
554#endif 555#endif
555 /* 556 /*
556 * for flush operations 557 * for flush operations
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index f24bfb2b9a2d..6d508767e144 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -3,8 +3,27 @@
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5 5
6/*
7 * CPU-up CPU-down
8 *
9 * BP AP BP AP
10 *
11 * OFFLINE OFFLINE
12 * | ^
13 * v |
14 * BRINGUP_CPU->AP_OFFLINE BRINGUP_CPU <- AP_IDLE_DEAD (idle thread/play_dead)
15 * | AP_OFFLINE
16 * v (IRQ-off) ,---------------^
17 * AP_ONLNE | (stop_machine)
18 * | TEARDOWN_CPU <- AP_ONLINE_IDLE
19 * | ^
20 * v |
21 * AP_ACTIVE AP_ACTIVE
22 */
23
6enum cpuhp_state { 24enum cpuhp_state {
7 CPUHP_OFFLINE, 25 CPUHP_INVALID = -1,
26 CPUHP_OFFLINE = 0,
8 CPUHP_CREATE_THREADS, 27 CPUHP_CREATE_THREADS,
9 CPUHP_PERF_PREPARE, 28 CPUHP_PERF_PREPARE,
10 CPUHP_PERF_X86_PREPARE, 29 CPUHP_PERF_X86_PREPARE,
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index a7f2ac689d29..41b8c5757859 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -167,11 +167,11 @@ struct iommu_resv_region {
167 * @map: map a physically contiguous memory region to an iommu domain 167 * @map: map a physically contiguous memory region to an iommu domain
168 * @unmap: unmap a physically contiguous memory region from an iommu domain 168 * @unmap: unmap a physically contiguous memory region from an iommu domain
169 * @map_sg: map a scatter-gather list of physically contiguous memory chunks 169 * @map_sg: map a scatter-gather list of physically contiguous memory chunks
170 * to an iommu domain
170 * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain 171 * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain
171 * @tlb_range_add: Add a given iova range to the flush queue for this domain 172 * @tlb_range_add: Add a given iova range to the flush queue for this domain
172 * @tlb_sync: Flush all queued ranges from the hardware TLBs and empty flush 173 * @tlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
173 * queue 174 * queue
174 * to an iommu domain
175 * @iova_to_phys: translate iova to physical address 175 * @iova_to_phys: translate iova to physical address
176 * @add_device: add device to iommu grouping 176 * @add_device: add device to iommu grouping
177 * @remove_device: remove device from iommu grouping 177 * @remove_device: remove device from iommu grouping
diff --git a/include/linux/key.h b/include/linux/key.h
index 044114185120..e315e16b6ff8 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -187,6 +187,7 @@ struct key {
187#define KEY_FLAG_BUILTIN 8 /* set if key is built in to the kernel */ 187#define KEY_FLAG_BUILTIN 8 /* set if key is built in to the kernel */
188#define KEY_FLAG_ROOT_CAN_INVAL 9 /* set if key can be invalidated by root without permission */ 188#define KEY_FLAG_ROOT_CAN_INVAL 9 /* set if key can be invalidated by root without permission */
189#define KEY_FLAG_KEEP 10 /* set if key should not be removed */ 189#define KEY_FLAG_KEEP 10 /* set if key should not be removed */
190#define KEY_FLAG_UID_KEYRING 11 /* set if key is a user or user session keyring */
190 191
191 /* the key type and key description string 192 /* the key type and key description string
192 * - the desc is used to match a key against search criteria 193 * - the desc is used to match a key against search criteria
@@ -243,6 +244,7 @@ extern struct key *key_alloc(struct key_type *type,
243#define KEY_ALLOC_NOT_IN_QUOTA 0x0002 /* not in quota */ 244#define KEY_ALLOC_NOT_IN_QUOTA 0x0002 /* not in quota */
244#define KEY_ALLOC_BUILT_IN 0x0004 /* Key is built into kernel */ 245#define KEY_ALLOC_BUILT_IN 0x0004 /* Key is built into kernel */
245#define KEY_ALLOC_BYPASS_RESTRICTION 0x0008 /* Override the check on restricted keyrings */ 246#define KEY_ALLOC_BYPASS_RESTRICTION 0x0008 /* Override the check on restricted keyrings */
247#define KEY_ALLOC_UID_KEYRING 0x0010 /* allocating a user or user session keyring */
246 248
247extern void key_revoke(struct key *key); 249extern void key_revoke(struct key *key);
248extern void key_invalidate(struct key *key); 250extern void key_invalidate(struct key *key);
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 9c5cb4480806..a726f96010d5 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -346,11 +346,6 @@ struct nvme_fc_remote_port {
346 * indicating an FC transport Aborted status. 346 * indicating an FC transport Aborted status.
347 * Entrypoint is Mandatory. 347 * Entrypoint is Mandatory.
348 * 348 *
349 * @defer_rcv: Called by the transport to signal the LLLD that it has
350 * begun processing of a previously received NVME CMD IU. The LLDD
351 * is now free to re-use the rcv buffer associated with the
352 * nvmefc_tgt_fcp_req.
353 *
354 * @max_hw_queues: indicates the maximum number of hw queues the LLDD 349 * @max_hw_queues: indicates the maximum number of hw queues the LLDD
355 * supports for cpu affinitization. 350 * supports for cpu affinitization.
356 * Value is Mandatory. Must be at least 1. 351 * Value is Mandatory. Must be at least 1.
@@ -806,11 +801,19 @@ struct nvmet_fc_target_port {
806 * outstanding operation (if there was one) to complete, then will 801 * outstanding operation (if there was one) to complete, then will
807 * call the fcp_req_release() callback to return the command's 802 * call the fcp_req_release() callback to return the command's
808 * exchange context back to the LLDD. 803 * exchange context back to the LLDD.
804 * Entrypoint is Mandatory.
809 * 805 *
810 * @fcp_req_release: Called by the transport to return a nvmefc_tgt_fcp_req 806 * @fcp_req_release: Called by the transport to return a nvmefc_tgt_fcp_req
811 * to the LLDD after all operations on the fcp operation are complete. 807 * to the LLDD after all operations on the fcp operation are complete.
812 * This may be due to the command completing or upon completion of 808 * This may be due to the command completing or upon completion of
813 * abort cleanup. 809 * abort cleanup.
810 * Entrypoint is Mandatory.
811 *
812 * @defer_rcv: Called by the transport to signal the LLLD that it has
813 * begun processing of a previously received NVME CMD IU. The LLDD
814 * is now free to re-use the rcv buffer associated with the
815 * nvmefc_tgt_fcp_req.
816 * Entrypoint is Optional.
814 * 817 *
815 * @max_hw_queues: indicates the maximum number of hw queues the LLDD 818 * @max_hw_queues: indicates the maximum number of hw queues the LLDD
816 * supports for cpu affinitization. 819 * supports for cpu affinitization.
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 87723c86f136..9310ce77d8e1 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -471,12 +471,14 @@ enum nvme_opcode {
471 * 471 *
472 * @NVME_SGL_FMT_ADDRESS: absolute address of the data block 472 * @NVME_SGL_FMT_ADDRESS: absolute address of the data block
473 * @NVME_SGL_FMT_OFFSET: relative offset of the in-capsule data block 473 * @NVME_SGL_FMT_OFFSET: relative offset of the in-capsule data block
474 * @NVME_SGL_FMT_TRANSPORT_A: transport defined format, value 0xA
474 * @NVME_SGL_FMT_INVALIDATE: RDMA transport specific remote invalidation 475 * @NVME_SGL_FMT_INVALIDATE: RDMA transport specific remote invalidation
475 * request subtype 476 * request subtype
476 */ 477 */
477enum { 478enum {
478 NVME_SGL_FMT_ADDRESS = 0x00, 479 NVME_SGL_FMT_ADDRESS = 0x00,
479 NVME_SGL_FMT_OFFSET = 0x01, 480 NVME_SGL_FMT_OFFSET = 0x01,
481 NVME_SGL_FMT_TRANSPORT_A = 0x0A,
480 NVME_SGL_FMT_INVALIDATE = 0x0f, 482 NVME_SGL_FMT_INVALIDATE = 0x0f,
481}; 483};
482 484
@@ -490,12 +492,16 @@ enum {
490 * 492 *
491 * For struct nvme_keyed_sgl_desc: 493 * For struct nvme_keyed_sgl_desc:
492 * @NVME_KEY_SGL_FMT_DATA_DESC: keyed data block descriptor 494 * @NVME_KEY_SGL_FMT_DATA_DESC: keyed data block descriptor
495 *
496 * Transport-specific SGL types:
497 * @NVME_TRANSPORT_SGL_DATA_DESC: Transport SGL data dlock descriptor
493 */ 498 */
494enum { 499enum {
495 NVME_SGL_FMT_DATA_DESC = 0x00, 500 NVME_SGL_FMT_DATA_DESC = 0x00,
496 NVME_SGL_FMT_SEG_DESC = 0x02, 501 NVME_SGL_FMT_SEG_DESC = 0x02,
497 NVME_SGL_FMT_LAST_SEG_DESC = 0x03, 502 NVME_SGL_FMT_LAST_SEG_DESC = 0x03,
498 NVME_KEY_SGL_FMT_DATA_DESC = 0x04, 503 NVME_KEY_SGL_FMT_DATA_DESC = 0x04,
504 NVME_TRANSPORT_SGL_DATA_DESC = 0x05,
499}; 505};
500 506
501struct nvme_sgl_desc { 507struct nvme_sgl_desc {
@@ -1127,19 +1133,6 @@ enum {
1127 NVME_SC_UNWRITTEN_BLOCK = 0x287, 1133 NVME_SC_UNWRITTEN_BLOCK = 0x287,
1128 1134
1129 NVME_SC_DNR = 0x4000, 1135 NVME_SC_DNR = 0x4000,
1130
1131
1132 /*
1133 * FC Transport-specific error status values for NVME commands
1134 *
1135 * Transport-specific status code values must be in the range 0xB0..0xBF
1136 */
1137
1138 /* Generic FC failure - catchall */
1139 NVME_SC_FC_TRANSPORT_ERROR = 0x00B0,
1140
1141 /* I/O failure due to FC ABTS'd */
1142 NVME_SC_FC_TRANSPORT_ABORTED = 0x00B1,
1143}; 1136};
1144 1137
1145struct nvme_completion { 1138struct nvme_completion {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index f68c58a93dd0..f4f8ee5a7362 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1685,6 +1685,8 @@ static inline int pci_get_new_domain_nr(void) { return -ENOSYS; }
1685 1685
1686#define dev_is_pci(d) (false) 1686#define dev_is_pci(d) (false)
1687#define dev_is_pf(d) (false) 1687#define dev_is_pf(d) (false)
1688static inline bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags)
1689{ return false; }
1688#endif /* CONFIG_PCI */ 1690#endif /* CONFIG_PCI */
1689 1691
1690/* Include architecture-dependent settings and functions */ 1692/* Include architecture-dependent settings and functions */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 92fb8dd5a9e4..26a7df4e558c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -65,25 +65,23 @@ struct task_group;
65 */ 65 */
66 66
67/* Used in tsk->state: */ 67/* Used in tsk->state: */
68#define TASK_RUNNING 0 68#define TASK_RUNNING 0x0000
69#define TASK_INTERRUPTIBLE 1 69#define TASK_INTERRUPTIBLE 0x0001
70#define TASK_UNINTERRUPTIBLE 2 70#define TASK_UNINTERRUPTIBLE 0x0002
71#define __TASK_STOPPED 4 71#define __TASK_STOPPED 0x0004
72#define __TASK_TRACED 8 72#define __TASK_TRACED 0x0008
73/* Used in tsk->exit_state: */ 73/* Used in tsk->exit_state: */
74#define EXIT_DEAD 16 74#define EXIT_DEAD 0x0010
75#define EXIT_ZOMBIE 32 75#define EXIT_ZOMBIE 0x0020
76#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) 76#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD)
77/* Used in tsk->state again: */ 77/* Used in tsk->state again: */
78#define TASK_DEAD 64 78#define TASK_PARKED 0x0040
79#define TASK_WAKEKILL 128 79#define TASK_DEAD 0x0080
80#define TASK_WAKING 256 80#define TASK_WAKEKILL 0x0100
81#define TASK_PARKED 512 81#define TASK_WAKING 0x0200
82#define TASK_NOLOAD 1024 82#define TASK_NOLOAD 0x0400
83#define TASK_NEW 2048 83#define TASK_NEW 0x0800
84#define TASK_STATE_MAX 4096 84#define TASK_STATE_MAX 0x1000
85
86#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn"
87 85
88/* Convenience macros for the sake of set_current_state: */ 86/* Convenience macros for the sake of set_current_state: */
89#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) 87#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
@@ -99,7 +97,8 @@ struct task_group;
99/* get_task_state(): */ 97/* get_task_state(): */
100#define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ 98#define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \
101 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ 99 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
102 __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD) 100 __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \
101 TASK_PARKED)
103 102
104#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) 103#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0)
105 104
@@ -1243,17 +1242,34 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk)
1243 return task_pgrp_nr_ns(tsk, &init_pid_ns); 1242 return task_pgrp_nr_ns(tsk, &init_pid_ns);
1244} 1243}
1245 1244
1246static inline char task_state_to_char(struct task_struct *task) 1245#define TASK_REPORT_IDLE (TASK_REPORT + 1)
1246#define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1)
1247
1248static inline unsigned int __get_task_state(struct task_struct *tsk)
1249{
1250 unsigned int tsk_state = READ_ONCE(tsk->state);
1251 unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT;
1252
1253 BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
1254
1255 if (tsk_state == TASK_IDLE)
1256 state = TASK_REPORT_IDLE;
1257
1258 return fls(state);
1259}
1260
1261static inline char __task_state_to_char(unsigned int state)
1247{ 1262{
1248 const char stat_nam[] = TASK_STATE_TO_CHAR_STR; 1263 static const char state_char[] = "RSDTtXZPI";
1249 unsigned long state = task->state;
1250 1264
1251 state = state ? __ffs(state) + 1 : 0; 1265 BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1);
1252 1266
1253 /* Make sure the string lines up properly with the number of task states: */ 1267 return state_char[state];
1254 BUILD_BUG_ON(sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1); 1268}
1255 1269
1256 return state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'; 1270static inline char task_state_to_char(struct task_struct *tsk)
1271{
1272 return __task_state_to_char(__get_task_state(tsk));
1257} 1273}
1258 1274
1259/** 1275/**
diff --git a/include/linux/timer.h b/include/linux/timer.h
index e6789b8757d5..6383c528b148 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -168,6 +168,20 @@ static inline void init_timer_on_stack_key(struct timer_list *timer,
168#define setup_pinned_deferrable_timer_on_stack(timer, fn, data) \ 168#define setup_pinned_deferrable_timer_on_stack(timer, fn, data) \
169 __setup_timer_on_stack((timer), (fn), (data), TIMER_DEFERRABLE | TIMER_PINNED) 169 __setup_timer_on_stack((timer), (fn), (data), TIMER_DEFERRABLE | TIMER_PINNED)
170 170
171#define TIMER_DATA_TYPE unsigned long
172#define TIMER_FUNC_TYPE void (*)(TIMER_DATA_TYPE)
173
174static inline void timer_setup(struct timer_list *timer,
175 void (*callback)(struct timer_list *),
176 unsigned int flags)
177{
178 __setup_timer(timer, (TIMER_FUNC_TYPE)callback,
179 (TIMER_DATA_TYPE)timer, flags);
180}
181
182#define from_timer(var, callback_timer, timer_fieldname) \
183 container_of(callback_timer, typeof(*var), timer_fieldname)
184
171/** 185/**
172 * timer_pending - is a timer pending? 186 * timer_pending - is a timer pending?
173 * @timer: the timer in question 187 * @timer: the timer in question
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index bdb1279a415b..e8608b2dc844 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -285,7 +285,7 @@ enum ib_tm_cap_flags {
285 IB_TM_CAP_RC = 1 << 0, 285 IB_TM_CAP_RC = 1 << 0,
286}; 286};
287 287
288struct ib_xrq_caps { 288struct ib_tm_caps {
289 /* Max size of RNDV header */ 289 /* Max size of RNDV header */
290 u32 max_rndv_hdr_size; 290 u32 max_rndv_hdr_size;
291 /* Max number of entries in tag matching list */ 291 /* Max number of entries in tag matching list */
@@ -358,7 +358,7 @@ struct ib_device_attr {
358 struct ib_rss_caps rss_caps; 358 struct ib_rss_caps rss_caps;
359 u32 max_wq_type_rq; 359 u32 max_wq_type_rq;
360 u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */ 360 u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */
361 struct ib_xrq_caps xrq_caps; 361 struct ib_tm_caps tm_caps;
362}; 362};
363 363
364enum ib_mtu { 364enum ib_mtu {
@@ -1739,7 +1739,7 @@ struct ib_mr {
1739 u32 lkey; 1739 u32 lkey;
1740 u32 rkey; 1740 u32 rkey;
1741 u64 iova; 1741 u64 iova;
1742 u32 length; 1742 u64 length;
1743 unsigned int page_size; 1743 unsigned int page_size;
1744 bool need_inval; 1744 bool need_inval;
1745 union { 1745 union {
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index ae1409ffe99a..3c8b7f625670 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -114,7 +114,10 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
114 * Preemption ignores task state, therefore preempted tasks are always 114 * Preemption ignores task state, therefore preempted tasks are always
115 * RUNNING (we will not have dequeued if state != RUNNING). 115 * RUNNING (we will not have dequeued if state != RUNNING).
116 */ 116 */
117 return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state; 117 if (preempt)
118 return TASK_STATE_MAX;
119
120 return __get_task_state(p);
118} 121}
119#endif /* CREATE_TRACE_POINTS */ 122#endif /* CREATE_TRACE_POINTS */
120 123
@@ -152,12 +155,14 @@ TRACE_EVENT(sched_switch,
152 155
153 TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", 156 TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
154 __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, 157 __entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
155 __entry->prev_state & (TASK_STATE_MAX-1) ? 158
156 __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|", 159 (__entry->prev_state & (TASK_REPORT_MAX - 1)) ?
157 { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, 160 __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|",
158 { 16, "Z" }, { 32, "X" }, { 64, "x" }, 161 { 0x01, "S" }, { 0x02, "D" }, { 0x04, "T" },
159 { 128, "K" }, { 256, "W" }, { 512, "P" }, 162 { 0x08, "t" }, { 0x10, "X" }, { 0x20, "Z" },
160 { 1024, "N" }) : "R", 163 { 0x40, "P" }, { 0x80, "I" }) :
164 "R",
165
161 __entry->prev_state & TASK_STATE_MAX ? "+" : "", 166 __entry->prev_state & TASK_STATE_MAX ? "+" : "",
162 __entry->next_comm, __entry->next_pid, __entry->next_prio) 167 __entry->next_comm, __entry->next_pid, __entry->next_prio)
163); 168);
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 9a0b6479fe0c..d4e0b53bfc75 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -261,7 +261,7 @@ struct ib_uverbs_ex_query_device_resp {
261 struct ib_uverbs_rss_caps rss_caps; 261 struct ib_uverbs_rss_caps rss_caps;
262 __u32 max_wq_type_rq; 262 __u32 max_wq_type_rq;
263 __u32 raw_packet_caps; 263 __u32 raw_packet_caps;
264 struct ib_uverbs_tm_caps xrq_caps; 264 struct ib_uverbs_tm_caps tm_caps;
265}; 265};
266 266
267struct ib_uverbs_query_port { 267struct ib_uverbs_query_port {
diff --git a/ipc/shm.c b/ipc/shm.c
index 1e2b1692ba2c..badac463e2c8 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1154,7 +1154,7 @@ static int put_compat_shm_info(struct shm_info *ip,
1154 info.shm_swp = ip->shm_swp; 1154 info.shm_swp = ip->shm_swp;
1155 info.swap_attempts = ip->swap_attempts; 1155 info.swap_attempts = ip->swap_attempts;
1156 info.swap_successes = ip->swap_successes; 1156 info.swap_successes = ip->swap_successes;
1157 return copy_to_user(up, &info, sizeof(info)); 1157 return copy_to_user(uip, &info, sizeof(info));
1158} 1158}
1159 1159
1160static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in, 1160static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in,
diff --git a/kernel/cpu.c b/kernel/cpu.c
index acf5308fad51..8de11a29e495 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -46,11 +46,13 @@
46 * @bringup: Single callback bringup or teardown selector 46 * @bringup: Single callback bringup or teardown selector
47 * @cb_state: The state for a single callback (install/uninstall) 47 * @cb_state: The state for a single callback (install/uninstall)
48 * @result: Result of the operation 48 * @result: Result of the operation
49 * @done: Signal completion to the issuer of the task 49 * @done_up: Signal completion to the issuer of the task for cpu-up
50 * @done_down: Signal completion to the issuer of the task for cpu-down
50 */ 51 */
51struct cpuhp_cpu_state { 52struct cpuhp_cpu_state {
52 enum cpuhp_state state; 53 enum cpuhp_state state;
53 enum cpuhp_state target; 54 enum cpuhp_state target;
55 enum cpuhp_state fail;
54#ifdef CONFIG_SMP 56#ifdef CONFIG_SMP
55 struct task_struct *thread; 57 struct task_struct *thread;
56 bool should_run; 58 bool should_run;
@@ -58,18 +60,39 @@ struct cpuhp_cpu_state {
58 bool single; 60 bool single;
59 bool bringup; 61 bool bringup;
60 struct hlist_node *node; 62 struct hlist_node *node;
63 struct hlist_node *last;
61 enum cpuhp_state cb_state; 64 enum cpuhp_state cb_state;
62 int result; 65 int result;
63 struct completion done; 66 struct completion done_up;
67 struct completion done_down;
64#endif 68#endif
65}; 69};
66 70
67static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state); 71static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
72 .fail = CPUHP_INVALID,
73};
68 74
69#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP) 75#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
70static struct lock_class_key cpuhp_state_key; 76static struct lockdep_map cpuhp_state_up_map =
71static struct lockdep_map cpuhp_state_lock_map = 77 STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
72 STATIC_LOCKDEP_MAP_INIT("cpuhp_state", &cpuhp_state_key); 78static struct lockdep_map cpuhp_state_down_map =
79 STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
80
81
82static void inline cpuhp_lock_acquire(bool bringup)
83{
84 lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
85}
86
87static void inline cpuhp_lock_release(bool bringup)
88{
89 lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
90}
91#else
92
93static void inline cpuhp_lock_acquire(bool bringup) { }
94static void inline cpuhp_lock_release(bool bringup) { }
95
73#endif 96#endif
74 97
75/** 98/**
@@ -123,13 +146,16 @@ static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
123/** 146/**
124 * cpuhp_invoke_callback _ Invoke the callbacks for a given state 147 * cpuhp_invoke_callback _ Invoke the callbacks for a given state
125 * @cpu: The cpu for which the callback should be invoked 148 * @cpu: The cpu for which the callback should be invoked
126 * @step: The step in the state machine 149 * @state: The state to do callbacks for
127 * @bringup: True if the bringup callback should be invoked 150 * @bringup: True if the bringup callback should be invoked
151 * @node: For multi-instance, do a single entry callback for install/remove
152 * @lastp: For multi-instance rollback, remember how far we got
128 * 153 *
129 * Called from cpu hotplug and from the state register machinery. 154 * Called from cpu hotplug and from the state register machinery.
130 */ 155 */
131static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state, 156static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
132 bool bringup, struct hlist_node *node) 157 bool bringup, struct hlist_node *node,
158 struct hlist_node **lastp)
133{ 159{
134 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); 160 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
135 struct cpuhp_step *step = cpuhp_get_step(state); 161 struct cpuhp_step *step = cpuhp_get_step(state);
@@ -137,7 +163,17 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
137 int (*cb)(unsigned int cpu); 163 int (*cb)(unsigned int cpu);
138 int ret, cnt; 164 int ret, cnt;
139 165
166 if (st->fail == state) {
167 st->fail = CPUHP_INVALID;
168
169 if (!(bringup ? step->startup.single : step->teardown.single))
170 return 0;
171
172 return -EAGAIN;
173 }
174
140 if (!step->multi_instance) { 175 if (!step->multi_instance) {
176 WARN_ON_ONCE(lastp && *lastp);
141 cb = bringup ? step->startup.single : step->teardown.single; 177 cb = bringup ? step->startup.single : step->teardown.single;
142 if (!cb) 178 if (!cb)
143 return 0; 179 return 0;
@@ -152,6 +188,7 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
152 188
153 /* Single invocation for instance add/remove */ 189 /* Single invocation for instance add/remove */
154 if (node) { 190 if (node) {
191 WARN_ON_ONCE(lastp && *lastp);
155 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node); 192 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
156 ret = cbm(cpu, node); 193 ret = cbm(cpu, node);
157 trace_cpuhp_exit(cpu, st->state, state, ret); 194 trace_cpuhp_exit(cpu, st->state, state, ret);
@@ -161,13 +198,23 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
161 /* State transition. Invoke on all instances */ 198 /* State transition. Invoke on all instances */
162 cnt = 0; 199 cnt = 0;
163 hlist_for_each(node, &step->list) { 200 hlist_for_each(node, &step->list) {
201 if (lastp && node == *lastp)
202 break;
203
164 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node); 204 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
165 ret = cbm(cpu, node); 205 ret = cbm(cpu, node);
166 trace_cpuhp_exit(cpu, st->state, state, ret); 206 trace_cpuhp_exit(cpu, st->state, state, ret);
167 if (ret) 207 if (ret) {
168 goto err; 208 if (!lastp)
209 goto err;
210
211 *lastp = node;
212 return ret;
213 }
169 cnt++; 214 cnt++;
170 } 215 }
216 if (lastp)
217 *lastp = NULL;
171 return 0; 218 return 0;
172err: 219err:
173 /* Rollback the instances if one failed */ 220 /* Rollback the instances if one failed */
@@ -178,12 +225,39 @@ err:
178 hlist_for_each(node, &step->list) { 225 hlist_for_each(node, &step->list) {
179 if (!cnt--) 226 if (!cnt--)
180 break; 227 break;
181 cbm(cpu, node); 228
229 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
230 ret = cbm(cpu, node);
231 trace_cpuhp_exit(cpu, st->state, state, ret);
232 /*
233 * Rollback must not fail,
234 */
235 WARN_ON_ONCE(ret);
182 } 236 }
183 return ret; 237 return ret;
184} 238}
185 239
186#ifdef CONFIG_SMP 240#ifdef CONFIG_SMP
241static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
242{
243 struct completion *done = bringup ? &st->done_up : &st->done_down;
244 wait_for_completion(done);
245}
246
247static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
248{
249 struct completion *done = bringup ? &st->done_up : &st->done_down;
250 complete(done);
251}
252
253/*
254 * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
255 */
256static bool cpuhp_is_atomic_state(enum cpuhp_state state)
257{
258 return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
259}
260
187/* Serializes the updates to cpu_online_mask, cpu_present_mask */ 261/* Serializes the updates to cpu_online_mask, cpu_present_mask */
188static DEFINE_MUTEX(cpu_add_remove_lock); 262static DEFINE_MUTEX(cpu_add_remove_lock);
189bool cpuhp_tasks_frozen; 263bool cpuhp_tasks_frozen;
@@ -271,14 +345,79 @@ void cpu_hotplug_enable(void)
271EXPORT_SYMBOL_GPL(cpu_hotplug_enable); 345EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
272#endif /* CONFIG_HOTPLUG_CPU */ 346#endif /* CONFIG_HOTPLUG_CPU */
273 347
274static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st); 348static inline enum cpuhp_state
349cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
350{
351 enum cpuhp_state prev_state = st->state;
352
353 st->rollback = false;
354 st->last = NULL;
355
356 st->target = target;
357 st->single = false;
358 st->bringup = st->state < target;
359
360 return prev_state;
361}
362
363static inline void
364cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
365{
366 st->rollback = true;
367
368 /*
369 * If we have st->last we need to undo partial multi_instance of this
370 * state first. Otherwise start undo at the previous state.
371 */
372 if (!st->last) {
373 if (st->bringup)
374 st->state--;
375 else
376 st->state++;
377 }
378
379 st->target = prev_state;
380 st->bringup = !st->bringup;
381}
382
383/* Regular hotplug invocation of the AP hotplug thread */
384static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
385{
386 if (!st->single && st->state == st->target)
387 return;
388
389 st->result = 0;
390 /*
391 * Make sure the above stores are visible before should_run becomes
392 * true. Paired with the mb() above in cpuhp_thread_fun()
393 */
394 smp_mb();
395 st->should_run = true;
396 wake_up_process(st->thread);
397 wait_for_ap_thread(st, st->bringup);
398}
399
400static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
401{
402 enum cpuhp_state prev_state;
403 int ret;
404
405 prev_state = cpuhp_set_state(st, target);
406 __cpuhp_kick_ap(st);
407 if ((ret = st->result)) {
408 cpuhp_reset_state(st, prev_state);
409 __cpuhp_kick_ap(st);
410 }
411
412 return ret;
413}
275 414
276static int bringup_wait_for_ap(unsigned int cpu) 415static int bringup_wait_for_ap(unsigned int cpu)
277{ 416{
278 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); 417 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
279 418
280 /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */ 419 /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
281 wait_for_completion(&st->done); 420 wait_for_ap_thread(st, true);
282 if (WARN_ON_ONCE((!cpu_online(cpu)))) 421 if (WARN_ON_ONCE((!cpu_online(cpu))))
283 return -ECANCELED; 422 return -ECANCELED;
284 423
@@ -286,12 +425,10 @@ static int bringup_wait_for_ap(unsigned int cpu)
286 stop_machine_unpark(cpu); 425 stop_machine_unpark(cpu);
287 kthread_unpark(st->thread); 426 kthread_unpark(st->thread);
288 427
289 /* Should we go further up ? */ 428 if (st->target <= CPUHP_AP_ONLINE_IDLE)
290 if (st->target > CPUHP_AP_ONLINE_IDLE) { 429 return 0;
291 __cpuhp_kick_ap_work(st); 430
292 wait_for_completion(&st->done); 431 return cpuhp_kick_ap(st, st->target);
293 }
294 return st->result;
295} 432}
296 433
297static int bringup_cpu(unsigned int cpu) 434static int bringup_cpu(unsigned int cpu)
@@ -317,32 +454,6 @@ static int bringup_cpu(unsigned int cpu)
317/* 454/*
318 * Hotplug state machine related functions 455 * Hotplug state machine related functions
319 */ 456 */
320static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
321{
322 for (st->state++; st->state < st->target; st->state++) {
323 struct cpuhp_step *step = cpuhp_get_step(st->state);
324
325 if (!step->skip_onerr)
326 cpuhp_invoke_callback(cpu, st->state, true, NULL);
327 }
328}
329
330static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
331 enum cpuhp_state target)
332{
333 enum cpuhp_state prev_state = st->state;
334 int ret = 0;
335
336 for (; st->state > target; st->state--) {
337 ret = cpuhp_invoke_callback(cpu, st->state, false, NULL);
338 if (ret) {
339 st->target = prev_state;
340 undo_cpu_down(cpu, st);
341 break;
342 }
343 }
344 return ret;
345}
346 457
347static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st) 458static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
348{ 459{
@@ -350,7 +461,7 @@ static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
350 struct cpuhp_step *step = cpuhp_get_step(st->state); 461 struct cpuhp_step *step = cpuhp_get_step(st->state);
351 462
352 if (!step->skip_onerr) 463 if (!step->skip_onerr)
353 cpuhp_invoke_callback(cpu, st->state, false, NULL); 464 cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
354 } 465 }
355} 466}
356 467
@@ -362,7 +473,7 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
362 473
363 while (st->state < target) { 474 while (st->state < target) {
364 st->state++; 475 st->state++;
365 ret = cpuhp_invoke_callback(cpu, st->state, true, NULL); 476 ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
366 if (ret) { 477 if (ret) {
367 st->target = prev_state; 478 st->target = prev_state;
368 undo_cpu_up(cpu, st); 479 undo_cpu_up(cpu, st);
@@ -379,7 +490,8 @@ static void cpuhp_create(unsigned int cpu)
379{ 490{
380 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); 491 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
381 492
382 init_completion(&st->done); 493 init_completion(&st->done_up);
494 init_completion(&st->done_down);
383} 495}
384 496
385static int cpuhp_should_run(unsigned int cpu) 497static int cpuhp_should_run(unsigned int cpu)
@@ -389,69 +501,90 @@ static int cpuhp_should_run(unsigned int cpu)
389 return st->should_run; 501 return st->should_run;
390} 502}
391 503
392/* Execute the teardown callbacks. Used to be CPU_DOWN_PREPARE */
393static int cpuhp_ap_offline(unsigned int cpu, struct cpuhp_cpu_state *st)
394{
395 enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU);
396
397 return cpuhp_down_callbacks(cpu, st, target);
398}
399
400/* Execute the online startup callbacks. Used to be CPU_ONLINE */
401static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st)
402{
403 return cpuhp_up_callbacks(cpu, st, st->target);
404}
405
406/* 504/*
407 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke 505 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
408 * callbacks when a state gets [un]installed at runtime. 506 * callbacks when a state gets [un]installed at runtime.
507 *
508 * Each invocation of this function by the smpboot thread does a single AP
509 * state callback.
510 *
511 * It has 3 modes of operation:
512 * - single: runs st->cb_state
513 * - up: runs ++st->state, while st->state < st->target
514 * - down: runs st->state--, while st->state > st->target
515 *
516 * When complete or on error, should_run is cleared and the completion is fired.
409 */ 517 */
410static void cpuhp_thread_fun(unsigned int cpu) 518static void cpuhp_thread_fun(unsigned int cpu)
411{ 519{
412 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); 520 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
413 int ret = 0; 521 bool bringup = st->bringup;
522 enum cpuhp_state state;
414 523
415 /* 524 /*
416 * Paired with the mb() in cpuhp_kick_ap_work and 525 * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
417 * cpuhp_invoke_ap_callback, so the work set is consistent visible. 526 * that if we see ->should_run we also see the rest of the state.
418 */ 527 */
419 smp_mb(); 528 smp_mb();
420 if (!st->should_run) 529
530 if (WARN_ON_ONCE(!st->should_run))
421 return; 531 return;
422 532
423 st->should_run = false; 533 cpuhp_lock_acquire(bringup);
424 534
425 lock_map_acquire(&cpuhp_state_lock_map);
426 /* Single callback invocation for [un]install ? */
427 if (st->single) { 535 if (st->single) {
428 if (st->cb_state < CPUHP_AP_ONLINE) { 536 state = st->cb_state;
429 local_irq_disable(); 537 st->should_run = false;
430 ret = cpuhp_invoke_callback(cpu, st->cb_state, 538 } else {
431 st->bringup, st->node); 539 if (bringup) {
432 local_irq_enable(); 540 st->state++;
541 state = st->state;
542 st->should_run = (st->state < st->target);
543 WARN_ON_ONCE(st->state > st->target);
433 } else { 544 } else {
434 ret = cpuhp_invoke_callback(cpu, st->cb_state, 545 state = st->state;
435 st->bringup, st->node); 546 st->state--;
547 st->should_run = (st->state > st->target);
548 WARN_ON_ONCE(st->state < st->target);
436 } 549 }
437 } else if (st->rollback) { 550 }
438 BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE); 551
552 WARN_ON_ONCE(!cpuhp_is_ap_state(state));
439 553
440 undo_cpu_down(cpu, st); 554 if (st->rollback) {
441 st->rollback = false; 555 struct cpuhp_step *step = cpuhp_get_step(state);
556 if (step->skip_onerr)
557 goto next;
558 }
559
560 if (cpuhp_is_atomic_state(state)) {
561 local_irq_disable();
562 st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
563 local_irq_enable();
564
565 /*
566 * STARTING/DYING must not fail!
567 */
568 WARN_ON_ONCE(st->result);
442 } else { 569 } else {
443 /* Cannot happen .... */ 570 st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
444 BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE); 571 }
445 572
446 /* Regular hotplug work */ 573 if (st->result) {
447 if (st->state < st->target) 574 /*
448 ret = cpuhp_ap_online(cpu, st); 575 * If we fail on a rollback, we're up a creek without no
449 else if (st->state > st->target) 576 * paddle, no way forward, no way back. We loose, thanks for
450 ret = cpuhp_ap_offline(cpu, st); 577 * playing.
578 */
579 WARN_ON_ONCE(st->rollback);
580 st->should_run = false;
451 } 581 }
452 lock_map_release(&cpuhp_state_lock_map); 582
453 st->result = ret; 583next:
454 complete(&st->done); 584 cpuhp_lock_release(bringup);
585
586 if (!st->should_run)
587 complete_ap_thread(st, bringup);
455} 588}
456 589
457/* Invoke a single callback on a remote cpu */ 590/* Invoke a single callback on a remote cpu */
@@ -460,62 +593,64 @@ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
460 struct hlist_node *node) 593 struct hlist_node *node)
461{ 594{
462 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); 595 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
596 int ret;
463 597
464 if (!cpu_online(cpu)) 598 if (!cpu_online(cpu))
465 return 0; 599 return 0;
466 600
467 lock_map_acquire(&cpuhp_state_lock_map); 601 cpuhp_lock_acquire(false);
468 lock_map_release(&cpuhp_state_lock_map); 602 cpuhp_lock_release(false);
603
604 cpuhp_lock_acquire(true);
605 cpuhp_lock_release(true);
469 606
470 /* 607 /*
471 * If we are up and running, use the hotplug thread. For early calls 608 * If we are up and running, use the hotplug thread. For early calls
472 * we invoke the thread function directly. 609 * we invoke the thread function directly.
473 */ 610 */
474 if (!st->thread) 611 if (!st->thread)
475 return cpuhp_invoke_callback(cpu, state, bringup, node); 612 return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
476 613
614 st->rollback = false;
615 st->last = NULL;
616
617 st->node = node;
618 st->bringup = bringup;
477 st->cb_state = state; 619 st->cb_state = state;
478 st->single = true; 620 st->single = true;
479 st->bringup = bringup;
480 st->node = node;
481 621
482 /* 622 __cpuhp_kick_ap(st);
483 * Make sure the above stores are visible before should_run becomes
484 * true. Paired with the mb() above in cpuhp_thread_fun()
485 */
486 smp_mb();
487 st->should_run = true;
488 wake_up_process(st->thread);
489 wait_for_completion(&st->done);
490 return st->result;
491}
492 623
493/* Regular hotplug invocation of the AP hotplug thread */
494static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st)
495{
496 st->result = 0;
497 st->single = false;
498 /* 624 /*
499 * Make sure the above stores are visible before should_run becomes 625 * If we failed and did a partial, do a rollback.
500 * true. Paired with the mb() above in cpuhp_thread_fun()
501 */ 626 */
502 smp_mb(); 627 if ((ret = st->result) && st->last) {
503 st->should_run = true; 628 st->rollback = true;
504 wake_up_process(st->thread); 629 st->bringup = !bringup;
630
631 __cpuhp_kick_ap(st);
632 }
633
634 return ret;
505} 635}
506 636
507static int cpuhp_kick_ap_work(unsigned int cpu) 637static int cpuhp_kick_ap_work(unsigned int cpu)
508{ 638{
509 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); 639 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
510 enum cpuhp_state state = st->state; 640 enum cpuhp_state prev_state = st->state;
641 int ret;
642
643 cpuhp_lock_acquire(false);
644 cpuhp_lock_release(false);
645
646 cpuhp_lock_acquire(true);
647 cpuhp_lock_release(true);
648
649 trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
650 ret = cpuhp_kick_ap(st, st->target);
651 trace_cpuhp_exit(cpu, st->state, prev_state, ret);
511 652
512 trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work); 653 return ret;
513 lock_map_acquire(&cpuhp_state_lock_map);
514 lock_map_release(&cpuhp_state_lock_map);
515 __cpuhp_kick_ap_work(st);
516 wait_for_completion(&st->done);
517 trace_cpuhp_exit(cpu, st->state, state, st->result);
518 return st->result;
519} 654}
520 655
521static struct smp_hotplug_thread cpuhp_threads = { 656static struct smp_hotplug_thread cpuhp_threads = {
@@ -581,6 +716,7 @@ static int take_cpu_down(void *_param)
581 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); 716 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
582 enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE); 717 enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
583 int err, cpu = smp_processor_id(); 718 int err, cpu = smp_processor_id();
719 int ret;
584 720
585 /* Ensure this CPU doesn't handle any more interrupts. */ 721 /* Ensure this CPU doesn't handle any more interrupts. */
586 err = __cpu_disable(); 722 err = __cpu_disable();
@@ -594,8 +730,13 @@ static int take_cpu_down(void *_param)
594 WARN_ON(st->state != CPUHP_TEARDOWN_CPU); 730 WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
595 st->state--; 731 st->state--;
596 /* Invoke the former CPU_DYING callbacks */ 732 /* Invoke the former CPU_DYING callbacks */
597 for (; st->state > target; st->state--) 733 for (; st->state > target; st->state--) {
598 cpuhp_invoke_callback(cpu, st->state, false, NULL); 734 ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
735 /*
736 * DYING must not fail!
737 */
738 WARN_ON_ONCE(ret);
739 }
599 740
600 /* Give up timekeeping duties */ 741 /* Give up timekeeping duties */
601 tick_handover_do_timer(); 742 tick_handover_do_timer();
@@ -639,7 +780,7 @@ static int takedown_cpu(unsigned int cpu)
639 * 780 *
640 * Wait for the stop thread to go away. 781 * Wait for the stop thread to go away.
641 */ 782 */
642 wait_for_completion(&st->done); 783 wait_for_ap_thread(st, false);
643 BUG_ON(st->state != CPUHP_AP_IDLE_DEAD); 784 BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
644 785
645 /* Interrupts are moved away from the dying cpu, reenable alloc/free */ 786 /* Interrupts are moved away from the dying cpu, reenable alloc/free */
@@ -658,7 +799,7 @@ static void cpuhp_complete_idle_dead(void *arg)
658{ 799{
659 struct cpuhp_cpu_state *st = arg; 800 struct cpuhp_cpu_state *st = arg;
660 801
661 complete(&st->done); 802 complete_ap_thread(st, false);
662} 803}
663 804
664void cpuhp_report_idle_dead(void) 805void cpuhp_report_idle_dead(void)
@@ -676,11 +817,32 @@ void cpuhp_report_idle_dead(void)
676 cpuhp_complete_idle_dead, st, 0); 817 cpuhp_complete_idle_dead, st, 0);
677} 818}
678 819
679#else 820static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
680#define takedown_cpu NULL 821{
681#endif 822 for (st->state++; st->state < st->target; st->state++) {
823 struct cpuhp_step *step = cpuhp_get_step(st->state);
682 824
683#ifdef CONFIG_HOTPLUG_CPU 825 if (!step->skip_onerr)
826 cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
827 }
828}
829
830static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
831 enum cpuhp_state target)
832{
833 enum cpuhp_state prev_state = st->state;
834 int ret = 0;
835
836 for (; st->state > target; st->state--) {
837 ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
838 if (ret) {
839 st->target = prev_state;
840 undo_cpu_down(cpu, st);
841 break;
842 }
843 }
844 return ret;
845}
684 846
685/* Requires cpu_add_remove_lock to be held */ 847/* Requires cpu_add_remove_lock to be held */
686static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, 848static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
@@ -699,13 +861,13 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
699 861
700 cpuhp_tasks_frozen = tasks_frozen; 862 cpuhp_tasks_frozen = tasks_frozen;
701 863
702 prev_state = st->state; 864 prev_state = cpuhp_set_state(st, target);
703 st->target = target;
704 /* 865 /*
705 * If the current CPU state is in the range of the AP hotplug thread, 866 * If the current CPU state is in the range of the AP hotplug thread,
706 * then we need to kick the thread. 867 * then we need to kick the thread.
707 */ 868 */
708 if (st->state > CPUHP_TEARDOWN_CPU) { 869 if (st->state > CPUHP_TEARDOWN_CPU) {
870 st->target = max((int)target, CPUHP_TEARDOWN_CPU);
709 ret = cpuhp_kick_ap_work(cpu); 871 ret = cpuhp_kick_ap_work(cpu);
710 /* 872 /*
711 * The AP side has done the error rollback already. Just 873 * The AP side has done the error rollback already. Just
@@ -720,6 +882,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
720 */ 882 */
721 if (st->state > CPUHP_TEARDOWN_CPU) 883 if (st->state > CPUHP_TEARDOWN_CPU)
722 goto out; 884 goto out;
885
886 st->target = target;
723 } 887 }
724 /* 888 /*
725 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need 889 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
@@ -727,9 +891,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
727 */ 891 */
728 ret = cpuhp_down_callbacks(cpu, st, target); 892 ret = cpuhp_down_callbacks(cpu, st, target);
729 if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) { 893 if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
730 st->target = prev_state; 894 cpuhp_reset_state(st, prev_state);
731 st->rollback = true; 895 __cpuhp_kick_ap(st);
732 cpuhp_kick_ap_work(cpu);
733 } 896 }
734 897
735out: 898out:
@@ -754,11 +917,15 @@ out:
754 cpu_maps_update_done(); 917 cpu_maps_update_done();
755 return err; 918 return err;
756} 919}
920
757int cpu_down(unsigned int cpu) 921int cpu_down(unsigned int cpu)
758{ 922{
759 return do_cpu_down(cpu, CPUHP_OFFLINE); 923 return do_cpu_down(cpu, CPUHP_OFFLINE);
760} 924}
761EXPORT_SYMBOL(cpu_down); 925EXPORT_SYMBOL(cpu_down);
926
927#else
928#define takedown_cpu NULL
762#endif /*CONFIG_HOTPLUG_CPU*/ 929#endif /*CONFIG_HOTPLUG_CPU*/
763 930
764/** 931/**
@@ -772,11 +939,16 @@ void notify_cpu_starting(unsigned int cpu)
772{ 939{
773 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); 940 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
774 enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE); 941 enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
942 int ret;
775 943
776 rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */ 944 rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
777 while (st->state < target) { 945 while (st->state < target) {
778 st->state++; 946 st->state++;
779 cpuhp_invoke_callback(cpu, st->state, true, NULL); 947 ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
948 /*
949 * STARTING must not fail!
950 */
951 WARN_ON_ONCE(ret);
780 } 952 }
781} 953}
782 954
@@ -794,7 +966,7 @@ void cpuhp_online_idle(enum cpuhp_state state)
794 return; 966 return;
795 967
796 st->state = CPUHP_AP_ONLINE_IDLE; 968 st->state = CPUHP_AP_ONLINE_IDLE;
797 complete(&st->done); 969 complete_ap_thread(st, true);
798} 970}
799 971
800/* Requires cpu_add_remove_lock to be held */ 972/* Requires cpu_add_remove_lock to be held */
@@ -829,7 +1001,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
829 1001
830 cpuhp_tasks_frozen = tasks_frozen; 1002 cpuhp_tasks_frozen = tasks_frozen;
831 1003
832 st->target = target; 1004 cpuhp_set_state(st, target);
833 /* 1005 /*
834 * If the current CPU state is in the range of the AP hotplug thread, 1006 * If the current CPU state is in the range of the AP hotplug thread,
835 * then we need to kick the thread once more. 1007 * then we need to kick the thread once more.
@@ -1296,6 +1468,10 @@ static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
1296 struct cpuhp_step *sp = cpuhp_get_step(state); 1468 struct cpuhp_step *sp = cpuhp_get_step(state);
1297 int ret; 1469 int ret;
1298 1470
1471 /*
1472 * If there's nothing to do, we done.
1473 * Relies on the union for multi_instance.
1474 */
1299 if ((bringup && !sp->startup.single) || 1475 if ((bringup && !sp->startup.single) ||
1300 (!bringup && !sp->teardown.single)) 1476 (!bringup && !sp->teardown.single))
1301 return 0; 1477 return 0;
@@ -1307,9 +1483,9 @@ static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
1307 if (cpuhp_is_ap_state(state)) 1483 if (cpuhp_is_ap_state(state))
1308 ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node); 1484 ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
1309 else 1485 else
1310 ret = cpuhp_invoke_callback(cpu, state, bringup, node); 1486 ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1311#else 1487#else
1312 ret = cpuhp_invoke_callback(cpu, state, bringup, node); 1488 ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1313#endif 1489#endif
1314 BUG_ON(ret && !bringup); 1490 BUG_ON(ret && !bringup);
1315 return ret; 1491 return ret;
@@ -1641,9 +1817,55 @@ static ssize_t show_cpuhp_target(struct device *dev,
1641} 1817}
1642static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target); 1818static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
1643 1819
1820
1821static ssize_t write_cpuhp_fail(struct device *dev,
1822 struct device_attribute *attr,
1823 const char *buf, size_t count)
1824{
1825 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1826 struct cpuhp_step *sp;
1827 int fail, ret;
1828
1829 ret = kstrtoint(buf, 10, &fail);
1830 if (ret)
1831 return ret;
1832
1833 /*
1834 * Cannot fail STARTING/DYING callbacks.
1835 */
1836 if (cpuhp_is_atomic_state(fail))
1837 return -EINVAL;
1838
1839 /*
1840 * Cannot fail anything that doesn't have callbacks.
1841 */
1842 mutex_lock(&cpuhp_state_mutex);
1843 sp = cpuhp_get_step(fail);
1844 if (!sp->startup.single && !sp->teardown.single)
1845 ret = -EINVAL;
1846 mutex_unlock(&cpuhp_state_mutex);
1847 if (ret)
1848 return ret;
1849
1850 st->fail = fail;
1851
1852 return count;
1853}
1854
1855static ssize_t show_cpuhp_fail(struct device *dev,
1856 struct device_attribute *attr, char *buf)
1857{
1858 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1859
1860 return sprintf(buf, "%d\n", st->fail);
1861}
1862
1863static DEVICE_ATTR(fail, 0644, show_cpuhp_fail, write_cpuhp_fail);
1864
1644static struct attribute *cpuhp_cpu_attrs[] = { 1865static struct attribute *cpuhp_cpu_attrs[] = {
1645 &dev_attr_state.attr, 1866 &dev_attr_state.attr,
1646 &dev_attr_target.attr, 1867 &dev_attr_target.attr,
1868 &dev_attr_fail.attr,
1647 NULL 1869 NULL
1648}; 1870};
1649 1871
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index af71a84e12ee..f684d8e5fa2b 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -412,6 +412,19 @@ err:
412 return NULL; 412 return NULL;
413} 413}
414 414
415static bool __always_inline rb_need_aux_wakeup(struct ring_buffer *rb)
416{
417 if (rb->aux_overwrite)
418 return false;
419
420 if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) {
421 rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark);
422 return true;
423 }
424
425 return false;
426}
427
415/* 428/*
416 * Commit the data written by hardware into the ring buffer by adjusting 429 * Commit the data written by hardware into the ring buffer by adjusting
417 * aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the 430 * aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the
@@ -451,10 +464,8 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
451 } 464 }
452 465
453 rb->user_page->aux_head = rb->aux_head; 466 rb->user_page->aux_head = rb->aux_head;
454 if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { 467 if (rb_need_aux_wakeup(rb))
455 wakeup = true; 468 wakeup = true;
456 rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark);
457 }
458 469
459 if (wakeup) { 470 if (wakeup) {
460 if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED) 471 if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)
@@ -484,9 +495,8 @@ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size)
484 rb->aux_head += size; 495 rb->aux_head += size;
485 496
486 rb->user_page->aux_head = rb->aux_head; 497 rb->user_page->aux_head = rb->aux_head;
487 if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { 498 if (rb_need_aux_wakeup(rb)) {
488 perf_output_wakeup(handle); 499 perf_output_wakeup(handle);
489 rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark);
490 handle->wakeup = rb->aux_wakeup + rb->aux_watermark; 500 handle->wakeup = rb->aux_wakeup + rb->aux_watermark;
491 } 501 }
492 502
diff --git a/kernel/exit.c b/kernel/exit.c
index 3481ababd06a..f2cd53e92147 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1600,12 +1600,10 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
1600 struct waitid_info info = {.status = 0}; 1600 struct waitid_info info = {.status = 0};
1601 long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL); 1601 long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL);
1602 int signo = 0; 1602 int signo = 0;
1603
1603 if (err > 0) { 1604 if (err > 0) {
1604 signo = SIGCHLD; 1605 signo = SIGCHLD;
1605 err = 0; 1606 err = 0;
1606 }
1607
1608 if (!err) {
1609 if (ru && copy_to_user(ru, &r, sizeof(struct rusage))) 1607 if (ru && copy_to_user(ru, &r, sizeof(struct rusage)))
1610 return -EFAULT; 1608 return -EFAULT;
1611 } 1609 }
@@ -1723,16 +1721,15 @@ COMPAT_SYSCALL_DEFINE5(waitid,
1723 if (err > 0) { 1721 if (err > 0) {
1724 signo = SIGCHLD; 1722 signo = SIGCHLD;
1725 err = 0; 1723 err = 0;
1726 } 1724 if (uru) {
1727 1725 /* kernel_waitid() overwrites everything in ru */
1728 if (!err && uru) { 1726 if (COMPAT_USE_64BIT_TIME)
1729 /* kernel_waitid() overwrites everything in ru */ 1727 err = copy_to_user(uru, &ru, sizeof(ru));
1730 if (COMPAT_USE_64BIT_TIME) 1728 else
1731 err = copy_to_user(uru, &ru, sizeof(ru)); 1729 err = put_compat_rusage(&ru, uru);
1732 else 1730 if (err)
1733 err = put_compat_rusage(&ru, uru); 1731 return -EFAULT;
1734 if (err) 1732 }
1735 return -EFAULT;
1736 } 1733 }
1737 1734
1738 if (!infop) 1735 if (!infop)
diff --git a/kernel/extable.c b/kernel/extable.c
index 38c2412401a1..9aa1cc41ecf7 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -102,15 +102,7 @@ int core_kernel_data(unsigned long addr)
102 102
103int __kernel_text_address(unsigned long addr) 103int __kernel_text_address(unsigned long addr)
104{ 104{
105 if (core_kernel_text(addr)) 105 if (kernel_text_address(addr))
106 return 1;
107 if (is_module_text_address(addr))
108 return 1;
109 if (is_ftrace_trampoline(addr))
110 return 1;
111 if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr))
112 return 1;
113 if (is_bpf_text_address(addr))
114 return 1; 106 return 1;
115 /* 107 /*
116 * There might be init symbols in saved stacktraces. 108 * There might be init symbols in saved stacktraces.
@@ -127,17 +119,42 @@ int __kernel_text_address(unsigned long addr)
127 119
128int kernel_text_address(unsigned long addr) 120int kernel_text_address(unsigned long addr)
129{ 121{
122 bool no_rcu;
123 int ret = 1;
124
130 if (core_kernel_text(addr)) 125 if (core_kernel_text(addr))
131 return 1; 126 return 1;
127
128 /*
129 * If a stack dump happens while RCU is not watching, then
130 * RCU needs to be notified that it requires to start
131 * watching again. This can happen either by tracing that
132 * triggers a stack trace, or a WARN() that happens during
133 * coming back from idle, or cpu on or offlining.
134 *
135 * is_module_text_address() as well as the kprobe slots
136 * and is_bpf_text_address() require RCU to be watching.
137 */
138 no_rcu = !rcu_is_watching();
139
140 /* Treat this like an NMI as it can happen anywhere */
141 if (no_rcu)
142 rcu_nmi_enter();
143
132 if (is_module_text_address(addr)) 144 if (is_module_text_address(addr))
133 return 1; 145 goto out;
134 if (is_ftrace_trampoline(addr)) 146 if (is_ftrace_trampoline(addr))
135 return 1; 147 goto out;
136 if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr)) 148 if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr))
137 return 1; 149 goto out;
138 if (is_bpf_text_address(addr)) 150 if (is_bpf_text_address(addr))
139 return 1; 151 goto out;
140 return 0; 152 ret = 0;
153out:
154 if (no_rcu)
155 rcu_nmi_exit();
156
157 return ret;
141} 158}
142 159
143/* 160/*
diff --git a/kernel/futex.c b/kernel/futex.c
index 3d38eaf05492..0518a0bfc746 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -821,8 +821,6 @@ static void get_pi_state(struct futex_pi_state *pi_state)
821/* 821/*
822 * Drops a reference to the pi_state object and frees or caches it 822 * Drops a reference to the pi_state object and frees or caches it
823 * when the last reference is gone. 823 * when the last reference is gone.
824 *
825 * Must be called with the hb lock held.
826 */ 824 */
827static void put_pi_state(struct futex_pi_state *pi_state) 825static void put_pi_state(struct futex_pi_state *pi_state)
828{ 826{
@@ -837,16 +835,22 @@ static void put_pi_state(struct futex_pi_state *pi_state)
837 * and has cleaned up the pi_state already 835 * and has cleaned up the pi_state already
838 */ 836 */
839 if (pi_state->owner) { 837 if (pi_state->owner) {
840 raw_spin_lock_irq(&pi_state->owner->pi_lock); 838 struct task_struct *owner;
841 list_del_init(&pi_state->list);
842 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
843 839
844 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner); 840 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
841 owner = pi_state->owner;
842 if (owner) {
843 raw_spin_lock(&owner->pi_lock);
844 list_del_init(&pi_state->list);
845 raw_spin_unlock(&owner->pi_lock);
846 }
847 rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
848 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
845 } 849 }
846 850
847 if (current->pi_state_cache) 851 if (current->pi_state_cache) {
848 kfree(pi_state); 852 kfree(pi_state);
849 else { 853 } else {
850 /* 854 /*
851 * pi_state->list is already empty. 855 * pi_state->list is already empty.
852 * clear pi_state->owner. 856 * clear pi_state->owner.
@@ -907,13 +911,14 @@ void exit_pi_state_list(struct task_struct *curr)
907 raw_spin_unlock_irq(&curr->pi_lock); 911 raw_spin_unlock_irq(&curr->pi_lock);
908 912
909 spin_lock(&hb->lock); 913 spin_lock(&hb->lock);
910 914 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
911 raw_spin_lock_irq(&curr->pi_lock); 915 raw_spin_lock(&curr->pi_lock);
912 /* 916 /*
913 * We dropped the pi-lock, so re-check whether this 917 * We dropped the pi-lock, so re-check whether this
914 * task still owns the PI-state: 918 * task still owns the PI-state:
915 */ 919 */
916 if (head->next != next) { 920 if (head->next != next) {
921 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
917 spin_unlock(&hb->lock); 922 spin_unlock(&hb->lock);
918 continue; 923 continue;
919 } 924 }
@@ -922,9 +927,10 @@ void exit_pi_state_list(struct task_struct *curr)
922 WARN_ON(list_empty(&pi_state->list)); 927 WARN_ON(list_empty(&pi_state->list));
923 list_del_init(&pi_state->list); 928 list_del_init(&pi_state->list);
924 pi_state->owner = NULL; 929 pi_state->owner = NULL;
925 raw_spin_unlock_irq(&curr->pi_lock); 930 raw_spin_unlock(&curr->pi_lock);
926 931
927 get_pi_state(pi_state); 932 get_pi_state(pi_state);
933 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
928 spin_unlock(&hb->lock); 934 spin_unlock(&hb->lock);
929 935
930 rt_mutex_futex_unlock(&pi_state->pi_mutex); 936 rt_mutex_futex_unlock(&pi_state->pi_mutex);
@@ -1208,6 +1214,10 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
1208 1214
1209 WARN_ON(!list_empty(&pi_state->list)); 1215 WARN_ON(!list_empty(&pi_state->list));
1210 list_add(&pi_state->list, &p->pi_state_list); 1216 list_add(&pi_state->list, &p->pi_state_list);
1217 /*
1218 * Assignment without holding pi_state->pi_mutex.wait_lock is safe
1219 * because there is no concurrency as the object is not published yet.
1220 */
1211 pi_state->owner = p; 1221 pi_state->owner = p;
1212 raw_spin_unlock_irq(&p->pi_lock); 1222 raw_spin_unlock_irq(&p->pi_lock);
1213 1223
@@ -2878,6 +2888,7 @@ retry:
2878 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); 2888 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
2879 spin_unlock(&hb->lock); 2889 spin_unlock(&hb->lock);
2880 2890
2891 /* drops pi_state->pi_mutex.wait_lock */
2881 ret = wake_futex_pi(uaddr, uval, pi_state); 2892 ret = wake_futex_pi(uaddr, uval, pi_state);
2882 2893
2883 put_pi_state(pi_state); 2894 put_pi_state(pi_state);
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index f7086b78ad6e..5270a54b9fa4 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -322,7 +322,6 @@ int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
322 /* Calc pointer to the next generic chip */ 322 /* Calc pointer to the next generic chip */
323 tmp += sizeof(*gc) + num_ct * sizeof(struct irq_chip_type); 323 tmp += sizeof(*gc) + num_ct * sizeof(struct irq_chip_type);
324 } 324 }
325 d->name = name;
326 return 0; 325 return 0;
327} 326}
328EXPORT_SYMBOL_GPL(__irq_alloc_domain_generic_chips); 327EXPORT_SYMBOL_GPL(__irq_alloc_domain_generic_chips);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index e84b7056bb08..ac4644e92b49 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -945,7 +945,7 @@ static int virq_debug_show(struct seq_file *m, void *private)
945 struct irq_desc *desc; 945 struct irq_desc *desc;
946 struct irq_domain *domain; 946 struct irq_domain *domain;
947 struct radix_tree_iter iter; 947 struct radix_tree_iter iter;
948 void **slot; 948 void __rcu **slot;
949 int i; 949 int i;
950 950
951 seq_printf(m, " %-16s %-6s %-10s %-10s %s\n", 951 seq_printf(m, " %-16s %-6s %-10s %-10s %s\n",
@@ -1453,7 +1453,7 @@ out_free_desc:
1453/* The irq_data was moved, fix the revmap to refer to the new location */ 1453/* The irq_data was moved, fix the revmap to refer to the new location */
1454static void irq_domain_fix_revmap(struct irq_data *d) 1454static void irq_domain_fix_revmap(struct irq_data *d)
1455{ 1455{
1456 void **slot; 1456 void __rcu **slot;
1457 1457
1458 if (d->hwirq < d->domain->revmap_size) 1458 if (d->hwirq < d->domain->revmap_size)
1459 return; /* Not using radix tree. */ 1459 return; /* Not using radix tree. */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 573dc52b0806..d00132b5c325 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1643,6 +1643,10 @@ const void *free_irq(unsigned int irq, void *dev_id)
1643#endif 1643#endif
1644 1644
1645 action = __free_irq(irq, dev_id); 1645 action = __free_irq(irq, dev_id);
1646
1647 if (!action)
1648 return NULL;
1649
1646 devname = action->name; 1650 devname = action->name;
1647 kfree(action); 1651 kfree(action);
1648 return devname; 1652 return devname;
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 02f660666ab8..1fefe6dcafd7 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -613,6 +613,33 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
613 DEFINE_WAKE_Q(wake_q); 613 DEFINE_WAKE_Q(wake_q);
614 614
615 /* 615 /*
616 * __rwsem_down_write_failed_common(sem)
617 * rwsem_optimistic_spin(sem)
618 * osq_unlock(sem->osq)
619 * ...
620 * atomic_long_add_return(&sem->count)
621 *
622 * - VS -
623 *
624 * __up_write()
625 * if (atomic_long_sub_return_release(&sem->count) < 0)
626 * rwsem_wake(sem)
627 * osq_is_locked(&sem->osq)
628 *
629 * And __up_write() must observe !osq_is_locked() when it observes the
630 * atomic_long_add_return() in order to not miss a wakeup.
631 *
632 * This boils down to:
633 *
634 * [S.rel] X = 1 [RmW] r0 = (Y += 0)
635 * MB RMB
636 * [RmW] Y += 1 [L] r1 = X
637 *
638 * exists (r0=1 /\ r1=0)
639 */
640 smp_rmb();
641
642 /*
616 * If a spinner is present, it is not necessary to do the wakeup. 643 * If a spinner is present, it is not necessary to do the wakeup.
617 * Try to do wakeup only if the trylock succeeds to minimize 644 * Try to do wakeup only if the trylock succeeds to minimize
618 * spinlock contention which may introduce too much delay in the 645 * spinlock contention which may introduce too much delay in the
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1250e4bd4b85..0c44c7b42e6d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -882,6 +882,11 @@ void rcu_irq_exit(void)
882 882
883 RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs enabled!!!"); 883 RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs enabled!!!");
884 rdtp = this_cpu_ptr(&rcu_dynticks); 884 rdtp = this_cpu_ptr(&rcu_dynticks);
885
886 /* Page faults can happen in NMI handlers, so check... */
887 if (READ_ONCE(rdtp->dynticks_nmi_nesting))
888 return;
889
885 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && 890 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
886 rdtp->dynticks_nesting < 1); 891 rdtp->dynticks_nesting < 1);
887 if (rdtp->dynticks_nesting <= 1) { 892 if (rdtp->dynticks_nesting <= 1) {
@@ -1015,6 +1020,11 @@ void rcu_irq_enter(void)
1015 1020
1016 RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_enter() invoked with irqs enabled!!!"); 1021 RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_enter() invoked with irqs enabled!!!");
1017 rdtp = this_cpu_ptr(&rcu_dynticks); 1022 rdtp = this_cpu_ptr(&rcu_dynticks);
1023
1024 /* Page faults can happen in NMI handlers, so check... */
1025 if (READ_ONCE(rdtp->dynticks_nmi_nesting))
1026 return;
1027
1018 oldval = rdtp->dynticks_nesting; 1028 oldval = rdtp->dynticks_nesting;
1019 rdtp->dynticks_nesting++; 1029 rdtp->dynticks_nesting++;
1020 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && 1030 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 18a6966567da..d17c5da523a0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5166,6 +5166,28 @@ void sched_show_task(struct task_struct *p)
5166 put_task_stack(p); 5166 put_task_stack(p);
5167} 5167}
5168 5168
5169static inline bool
5170state_filter_match(unsigned long state_filter, struct task_struct *p)
5171{
5172 /* no filter, everything matches */
5173 if (!state_filter)
5174 return true;
5175
5176 /* filter, but doesn't match */
5177 if (!(p->state & state_filter))
5178 return false;
5179
5180 /*
5181 * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
5182 * TASK_KILLABLE).
5183 */
5184 if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE)
5185 return false;
5186
5187 return true;
5188}
5189
5190
5169void show_state_filter(unsigned long state_filter) 5191void show_state_filter(unsigned long state_filter)
5170{ 5192{
5171 struct task_struct *g, *p; 5193 struct task_struct *g, *p;
@@ -5188,7 +5210,7 @@ void show_state_filter(unsigned long state_filter)
5188 */ 5210 */
5189 touch_nmi_watchdog(); 5211 touch_nmi_watchdog();
5190 touch_all_softlockup_watchdogs(); 5212 touch_all_softlockup_watchdogs();
5191 if (!state_filter || (p->state & state_filter)) 5213 if (state_filter_match(state_filter, p))
5192 sched_show_task(p); 5214 sched_show_task(p);
5193 } 5215 }
5194 5216
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 01217fb5a5de..2f93e4a2d9f6 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -466,8 +466,6 @@ static char *task_group_path(struct task_group *tg)
466} 466}
467#endif 467#endif
468 468
469static const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
470
471static void 469static void
472print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) 470print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
473{ 471{
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index c24579dfa7a1..bb3a38005b9c 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -473,14 +473,19 @@ static long seccomp_attach_filter(unsigned int flags,
473 return 0; 473 return 0;
474} 474}
475 475
476void __get_seccomp_filter(struct seccomp_filter *filter)
477{
478 /* Reference count is bounded by the number of total processes. */
479 refcount_inc(&filter->usage);
480}
481
476/* get_seccomp_filter - increments the reference count of the filter on @tsk */ 482/* get_seccomp_filter - increments the reference count of the filter on @tsk */
477void get_seccomp_filter(struct task_struct *tsk) 483void get_seccomp_filter(struct task_struct *tsk)
478{ 484{
479 struct seccomp_filter *orig = tsk->seccomp.filter; 485 struct seccomp_filter *orig = tsk->seccomp.filter;
480 if (!orig) 486 if (!orig)
481 return; 487 return;
482 /* Reference count is bounded by the number of total processes. */ 488 __get_seccomp_filter(orig);
483 refcount_inc(&orig->usage);
484} 489}
485 490
486static inline void seccomp_filter_free(struct seccomp_filter *filter) 491static inline void seccomp_filter_free(struct seccomp_filter *filter)
@@ -491,10 +496,8 @@ static inline void seccomp_filter_free(struct seccomp_filter *filter)
491 } 496 }
492} 497}
493 498
494/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ 499static void __put_seccomp_filter(struct seccomp_filter *orig)
495void put_seccomp_filter(struct task_struct *tsk)
496{ 500{
497 struct seccomp_filter *orig = tsk->seccomp.filter;
498 /* Clean up single-reference branches iteratively. */ 501 /* Clean up single-reference branches iteratively. */
499 while (orig && refcount_dec_and_test(&orig->usage)) { 502 while (orig && refcount_dec_and_test(&orig->usage)) {
500 struct seccomp_filter *freeme = orig; 503 struct seccomp_filter *freeme = orig;
@@ -503,6 +506,12 @@ void put_seccomp_filter(struct task_struct *tsk)
503 } 506 }
504} 507}
505 508
509/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
510void put_seccomp_filter(struct task_struct *tsk)
511{
512 __put_seccomp_filter(tsk->seccomp.filter);
513}
514
506static void seccomp_init_siginfo(siginfo_t *info, int syscall, int reason) 515static void seccomp_init_siginfo(siginfo_t *info, int syscall, int reason)
507{ 516{
508 memset(info, 0, sizeof(*info)); 517 memset(info, 0, sizeof(*info));
@@ -1025,13 +1034,13 @@ long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
1025 if (!data) 1034 if (!data)
1026 goto out; 1035 goto out;
1027 1036
1028 get_seccomp_filter(task); 1037 __get_seccomp_filter(filter);
1029 spin_unlock_irq(&task->sighand->siglock); 1038 spin_unlock_irq(&task->sighand->siglock);
1030 1039
1031 if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog))) 1040 if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog)))
1032 ret = -EFAULT; 1041 ret = -EFAULT;
1033 1042
1034 put_seccomp_filter(task); 1043 __put_seccomp_filter(filter);
1035 return ret; 1044 return ret;
1036 1045
1037out: 1046out:
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6648fbbb8157..423554ad3610 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -367,7 +367,8 @@ static struct ctl_table kern_table[] = {
367 .data = &sysctl_sched_time_avg, 367 .data = &sysctl_sched_time_avg,
368 .maxlen = sizeof(unsigned int), 368 .maxlen = sizeof(unsigned int),
369 .mode = 0644, 369 .mode = 0644,
370 .proc_handler = proc_dointvec, 370 .proc_handler = proc_dointvec_minmax,
371 .extra1 = &one,
371 }, 372 },
372#ifdef CONFIG_SCHEDSTATS 373#ifdef CONFIG_SCHEDSTATS
373 { 374 {
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 2a685b45b73b..45a3928544ce 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -648,6 +648,12 @@ int blk_trace_startstop(struct request_queue *q, int start)
648} 648}
649EXPORT_SYMBOL_GPL(blk_trace_startstop); 649EXPORT_SYMBOL_GPL(blk_trace_startstop);
650 650
651/*
652 * When reading or writing the blktrace sysfs files, the references to the
653 * opened sysfs or device files should prevent the underlying block device
654 * from being removed. So no further delete protection is really needed.
655 */
656
651/** 657/**
652 * blk_trace_ioctl: - handle the ioctls associated with tracing 658 * blk_trace_ioctl: - handle the ioctls associated with tracing
653 * @bdev: the block device 659 * @bdev: the block device
@@ -665,7 +671,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
665 if (!q) 671 if (!q)
666 return -ENXIO; 672 return -ENXIO;
667 673
668 mutex_lock(&bdev->bd_mutex); 674 mutex_lock(&q->blk_trace_mutex);
669 675
670 switch (cmd) { 676 switch (cmd) {
671 case BLKTRACESETUP: 677 case BLKTRACESETUP:
@@ -691,7 +697,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
691 break; 697 break;
692 } 698 }
693 699
694 mutex_unlock(&bdev->bd_mutex); 700 mutex_unlock(&q->blk_trace_mutex);
695 return ret; 701 return ret;
696} 702}
697 703
@@ -1727,7 +1733,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1727 if (q == NULL) 1733 if (q == NULL)
1728 goto out_bdput; 1734 goto out_bdput;
1729 1735
1730 mutex_lock(&bdev->bd_mutex); 1736 mutex_lock(&q->blk_trace_mutex);
1731 1737
1732 if (attr == &dev_attr_enable) { 1738 if (attr == &dev_attr_enable) {
1733 ret = sprintf(buf, "%u\n", !!q->blk_trace); 1739 ret = sprintf(buf, "%u\n", !!q->blk_trace);
@@ -1746,7 +1752,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1746 ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); 1752 ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
1747 1753
1748out_unlock_bdev: 1754out_unlock_bdev:
1749 mutex_unlock(&bdev->bd_mutex); 1755 mutex_unlock(&q->blk_trace_mutex);
1750out_bdput: 1756out_bdput:
1751 bdput(bdev); 1757 bdput(bdev);
1752out: 1758out:
@@ -1788,7 +1794,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1788 if (q == NULL) 1794 if (q == NULL)
1789 goto out_bdput; 1795 goto out_bdput;
1790 1796
1791 mutex_lock(&bdev->bd_mutex); 1797 mutex_lock(&q->blk_trace_mutex);
1792 1798
1793 if (attr == &dev_attr_enable) { 1799 if (attr == &dev_attr_enable) {
1794 if (value) 1800 if (value)
@@ -1814,7 +1820,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1814 } 1820 }
1815 1821
1816out_unlock_bdev: 1822out_unlock_bdev:
1817 mutex_unlock(&bdev->bd_mutex); 1823 mutex_unlock(&q->blk_trace_mutex);
1818out_bdput: 1824out_bdput:
1819 bdput(bdev); 1825 bdput(bdev);
1820out: 1826out:
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index bac629af2285..c738e764e2a5 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -656,15 +656,6 @@ int trace_print_lat_context(struct trace_iterator *iter)
656 return !trace_seq_has_overflowed(s); 656 return !trace_seq_has_overflowed(s);
657} 657}
658 658
659static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
660
661static int task_state_char(unsigned long state)
662{
663 int bit = state ? __ffs(state) + 1 : 0;
664
665 return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
666}
667
668/** 659/**
669 * ftrace_find_event - find a registered event 660 * ftrace_find_event - find a registered event
670 * @type: the type of event to look for 661 * @type: the type of event to look for
@@ -930,8 +921,8 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
930 921
931 trace_assign_type(field, iter->ent); 922 trace_assign_type(field, iter->ent);
932 923
933 T = task_state_char(field->next_state); 924 T = __task_state_to_char(field->next_state);
934 S = task_state_char(field->prev_state); 925 S = __task_state_to_char(field->prev_state);
935 trace_find_cmdline(field->next_pid, comm); 926 trace_find_cmdline(field->next_pid, comm);
936 trace_seq_printf(&iter->seq, 927 trace_seq_printf(&iter->seq,
937 " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", 928 " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
@@ -966,8 +957,8 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
966 trace_assign_type(field, iter->ent); 957 trace_assign_type(field, iter->ent);
967 958
968 if (!S) 959 if (!S)
969 S = task_state_char(field->prev_state); 960 S = __task_state_to_char(field->prev_state);
970 T = task_state_char(field->next_state); 961 T = __task_state_to_char(field->next_state);
971 trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n", 962 trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n",
972 field->prev_pid, 963 field->prev_pid,
973 field->prev_prio, 964 field->prev_prio,
@@ -1002,8 +993,8 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
1002 trace_assign_type(field, iter->ent); 993 trace_assign_type(field, iter->ent);
1003 994
1004 if (!S) 995 if (!S)
1005 S = task_state_char(field->prev_state); 996 S = __task_state_to_char(field->prev_state);
1006 T = task_state_char(field->next_state); 997 T = __task_state_to_char(field->next_state);
1007 998
1008 SEQ_PUT_HEX_FIELD(s, field->prev_pid); 999 SEQ_PUT_HEX_FIELD(s, field->prev_pid);
1009 SEQ_PUT_HEX_FIELD(s, field->prev_prio); 1000 SEQ_PUT_HEX_FIELD(s, field->prev_prio);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index ddec53b67646..0c331978b1a6 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -397,10 +397,10 @@ tracing_sched_switch_trace(struct trace_array *tr,
397 entry = ring_buffer_event_data(event); 397 entry = ring_buffer_event_data(event);
398 entry->prev_pid = prev->pid; 398 entry->prev_pid = prev->pid;
399 entry->prev_prio = prev->prio; 399 entry->prev_prio = prev->prio;
400 entry->prev_state = prev->state; 400 entry->prev_state = __get_task_state(prev);
401 entry->next_pid = next->pid; 401 entry->next_pid = next->pid;
402 entry->next_prio = next->prio; 402 entry->next_prio = next->prio;
403 entry->next_state = next->state; 403 entry->next_state = __get_task_state(next);
404 entry->next_cpu = task_cpu(next); 404 entry->next_cpu = task_cpu(next);
405 405
406 if (!call_filter_check_discard(call, entry, buffer, event)) 406 if (!call_filter_check_discard(call, entry, buffer, event))
@@ -425,10 +425,10 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
425 entry = ring_buffer_event_data(event); 425 entry = ring_buffer_event_data(event);
426 entry->prev_pid = curr->pid; 426 entry->prev_pid = curr->pid;
427 entry->prev_prio = curr->prio; 427 entry->prev_prio = curr->prio;
428 entry->prev_state = curr->state; 428 entry->prev_state = __get_task_state(curr);
429 entry->next_pid = wakee->pid; 429 entry->next_pid = wakee->pid;
430 entry->next_prio = wakee->prio; 430 entry->next_prio = wakee->prio;
431 entry->next_state = wakee->state; 431 entry->next_state = __get_task_state(wakee);
432 entry->next_cpu = task_cpu(wakee); 432 entry->next_cpu = task_cpu(wakee);
433 433
434 if (!call_filter_check_discard(call, entry, buffer, event)) 434 if (!call_filter_check_discard(call, entry, buffer, event))
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index a4df67cbc711..49cb41412eec 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -96,23 +96,9 @@ check_stack(unsigned long ip, unsigned long *stack)
96 if (in_nmi()) 96 if (in_nmi())
97 return; 97 return;
98 98
99 /*
100 * There's a slight chance that we are tracing inside the
101 * RCU infrastructure, and rcu_irq_enter() will not work
102 * as expected.
103 */
104 if (unlikely(rcu_irq_enter_disabled()))
105 return;
106
107 local_irq_save(flags); 99 local_irq_save(flags);
108 arch_spin_lock(&stack_trace_max_lock); 100 arch_spin_lock(&stack_trace_max_lock);
109 101
110 /*
111 * RCU may not be watching, make it see us.
112 * The stack trace code uses rcu_sched.
113 */
114 rcu_irq_enter();
115
116 /* In case another CPU set the tracer_frame on us */ 102 /* In case another CPU set the tracer_frame on us */
117 if (unlikely(!frame_size)) 103 if (unlikely(!frame_size))
118 this_size -= tracer_frame; 104 this_size -= tracer_frame;
@@ -205,7 +191,6 @@ check_stack(unsigned long ip, unsigned long *stack)
205 } 191 }
206 192
207 out: 193 out:
208 rcu_irq_exit();
209 arch_spin_unlock(&stack_trace_max_lock); 194 arch_spin_unlock(&stack_trace_max_lock);
210 local_irq_restore(flags); 195 local_irq_restore(flags);
211} 196}
diff --git a/mm/filemap.c b/mm/filemap.c
index 870971e20967..db250d0e0565 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2926,9 +2926,15 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
2926 * we're writing. Either one is a pretty crazy thing to do, 2926 * we're writing. Either one is a pretty crazy thing to do,
2927 * so we don't support it 100%. If this invalidation 2927 * so we don't support it 100%. If this invalidation
2928 * fails, tough, the write still worked... 2928 * fails, tough, the write still worked...
2929 *
2930 * Most of the time we do not need this since dio_complete() will do
2931 * the invalidation for us. However there are some file systems that
2932 * do not end up with dio_complete() being called, so let's not break
2933 * them by removing it completely
2929 */ 2934 */
2930 invalidate_inode_pages2_range(mapping, 2935 if (mapping->nrpages)
2931 pos >> PAGE_SHIFT, end); 2936 invalidate_inode_pages2_range(mapping,
2937 pos >> PAGE_SHIFT, end);
2932 2938
2933 if (written > 0) { 2939 if (written > 0) {
2934 pos += written; 2940 pos += written;
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index c18115d22f00..db82a40875e8 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -126,14 +126,4 @@ config BT_DEBUGFS
126 Provide extensive information about internal Bluetooth states 126 Provide extensive information about internal Bluetooth states
127 in debugfs. 127 in debugfs.
128 128
129config BT_LEGACY_IOCTL
130 bool "Enable legacy ioctl interfaces"
131 depends on BT && BT_BREDR
132 default y
133 help
134 Enable support for legacy ioctl interfaces. This is only needed
135 for old and deprecated applications using direct ioctl calls for
136 controller management. Since Linux 3.4 all configuration and
137 setup is done via mgmt interface and this is no longer needed.
138
139source "drivers/bluetooth/Kconfig" 129source "drivers/bluetooth/Kconfig"
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 0bad296fe0af..65d734c165bd 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -878,7 +878,6 @@ static int hci_sock_release(struct socket *sock)
878 return 0; 878 return 0;
879} 879}
880 880
881#ifdef CONFIG_BT_LEGACY_IOCTL
882static int hci_sock_blacklist_add(struct hci_dev *hdev, void __user *arg) 881static int hci_sock_blacklist_add(struct hci_dev *hdev, void __user *arg)
883{ 882{
884 bdaddr_t bdaddr; 883 bdaddr_t bdaddr;
@@ -1050,7 +1049,6 @@ done:
1050 release_sock(sk); 1049 release_sock(sk);
1051 return err; 1050 return err;
1052} 1051}
1053#endif
1054 1052
1055static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, 1053static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
1056 int addr_len) 1054 int addr_len)
@@ -1971,11 +1969,7 @@ static const struct proto_ops hci_sock_ops = {
1971 .getname = hci_sock_getname, 1969 .getname = hci_sock_getname,
1972 .sendmsg = hci_sock_sendmsg, 1970 .sendmsg = hci_sock_sendmsg,
1973 .recvmsg = hci_sock_recvmsg, 1971 .recvmsg = hci_sock_recvmsg,
1974#ifdef CONFIG_BT_LEGACY_IOCTL
1975 .ioctl = hci_sock_ioctl, 1972 .ioctl = hci_sock_ioctl,
1976#else
1977 .ioctl = sock_no_ioctl,
1978#endif
1979 .poll = datagram_poll, 1973 .poll = datagram_poll,
1980 .listen = sock_no_listen, 1974 .listen = sock_no_listen,
1981 .shutdown = sock_no_shutdown, 1975 .shutdown = sock_no_shutdown,
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 5a936a6a31a3..df062e086bdb 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -401,7 +401,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
401 if (unlikely(n != mw->mw_nents)) 401 if (unlikely(n != mw->mw_nents))
402 goto out_mapmr_err; 402 goto out_mapmr_err;
403 403
404 dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n", 404 dprintk("RPC: %s: Using frmr %p to map %u segments (%llu bytes)\n",
405 __func__, frmr, mw->mw_nents, mr->length); 405 __func__, frmr, mw->mw_nents, mr->length);
406 406
407 key = (u8)(mr->rkey & 0x000000FF); 407 key = (u8)(mr->rkey & 0x000000FF);
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 2e3a10e79ca9..061d0c3a420a 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -265,6 +265,8 @@ objtool_args += --no-fp
265endif 265endif
266ifdef CONFIG_GCOV_KERNEL 266ifdef CONFIG_GCOV_KERNEL
267objtool_args += --no-unreachable 267objtool_args += --no-unreachable
268else
269objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable)
268endif 270endif
269 271
270# 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory 272# 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory
diff --git a/security/keys/Kconfig b/security/keys/Kconfig
index a7a23b5541f8..91eafada3164 100644
--- a/security/keys/Kconfig
+++ b/security/keys/Kconfig
@@ -45,10 +45,8 @@ config BIG_KEYS
45 bool "Large payload keys" 45 bool "Large payload keys"
46 depends on KEYS 46 depends on KEYS
47 depends on TMPFS 47 depends on TMPFS
48 depends on (CRYPTO_ANSI_CPRNG = y || CRYPTO_DRBG = y)
49 select CRYPTO_AES 48 select CRYPTO_AES
50 select CRYPTO_ECB 49 select CRYPTO_GCM
51 select CRYPTO_RNG
52 help 50 help
53 This option provides support for holding large keys within the kernel 51 This option provides support for holding large keys within the kernel
54 (for example Kerberos ticket caches). The data may be stored out to 52 (for example Kerberos ticket caches). The data may be stored out to
diff --git a/security/keys/big_key.c b/security/keys/big_key.c
index 6acb00f6f22c..e607830b6154 100644
--- a/security/keys/big_key.c
+++ b/security/keys/big_key.c
@@ -1,5 +1,6 @@
1/* Large capacity key type 1/* Large capacity key type
2 * 2 *
3 * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
3 * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. 4 * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 5 * Written by David Howells (dhowells@redhat.com)
5 * 6 *
@@ -16,10 +17,10 @@
16#include <linux/shmem_fs.h> 17#include <linux/shmem_fs.h>
17#include <linux/err.h> 18#include <linux/err.h>
18#include <linux/scatterlist.h> 19#include <linux/scatterlist.h>
20#include <linux/random.h>
19#include <keys/user-type.h> 21#include <keys/user-type.h>
20#include <keys/big_key-type.h> 22#include <keys/big_key-type.h>
21#include <crypto/rng.h> 23#include <crypto/aead.h>
22#include <crypto/skcipher.h>
23 24
24/* 25/*
25 * Layout of key payload words. 26 * Layout of key payload words.
@@ -49,7 +50,12 @@ enum big_key_op {
49/* 50/*
50 * Key size for big_key data encryption 51 * Key size for big_key data encryption
51 */ 52 */
52#define ENC_KEY_SIZE 16 53#define ENC_KEY_SIZE 32
54
55/*
56 * Authentication tag length
57 */
58#define ENC_AUTHTAG_SIZE 16
53 59
54/* 60/*
55 * big_key defined keys take an arbitrary string as the description and an 61 * big_key defined keys take an arbitrary string as the description and an
@@ -64,57 +70,62 @@ struct key_type key_type_big_key = {
64 .destroy = big_key_destroy, 70 .destroy = big_key_destroy,
65 .describe = big_key_describe, 71 .describe = big_key_describe,
66 .read = big_key_read, 72 .read = big_key_read,
73 /* no ->update(); don't add it without changing big_key_crypt() nonce */
67}; 74};
68 75
69/* 76/*
70 * Crypto names for big_key data encryption 77 * Crypto names for big_key data authenticated encryption
71 */ 78 */
72static const char big_key_rng_name[] = "stdrng"; 79static const char big_key_alg_name[] = "gcm(aes)";
73static const char big_key_alg_name[] = "ecb(aes)";
74 80
75/* 81/*
76 * Crypto algorithms for big_key data encryption 82 * Crypto algorithms for big_key data authenticated encryption
77 */ 83 */
78static struct crypto_rng *big_key_rng; 84static struct crypto_aead *big_key_aead;
79static struct crypto_skcipher *big_key_skcipher;
80 85
81/* 86/*
82 * Generate random key to encrypt big_key data 87 * Since changing the key affects the entire object, we need a mutex.
83 */ 88 */
84static inline int big_key_gen_enckey(u8 *key) 89static DEFINE_MUTEX(big_key_aead_lock);
85{
86 return crypto_rng_get_bytes(big_key_rng, key, ENC_KEY_SIZE);
87}
88 90
89/* 91/*
90 * Encrypt/decrypt big_key data 92 * Encrypt/decrypt big_key data
91 */ 93 */
92static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key) 94static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key)
93{ 95{
94 int ret = -EINVAL; 96 int ret;
95 struct scatterlist sgio; 97 struct scatterlist sgio;
96 SKCIPHER_REQUEST_ON_STACK(req, big_key_skcipher); 98 struct aead_request *aead_req;
97 99 /* We always use a zero nonce. The reason we can get away with this is
98 if (crypto_skcipher_setkey(big_key_skcipher, key, ENC_KEY_SIZE)) { 100 * because we're using a different randomly generated key for every
101 * different encryption. Notably, too, key_type_big_key doesn't define
102 * an .update function, so there's no chance we'll wind up reusing the
103 * key to encrypt updated data. Simply put: one key, one encryption.
104 */
105 u8 zero_nonce[crypto_aead_ivsize(big_key_aead)];
106
107 aead_req = aead_request_alloc(big_key_aead, GFP_KERNEL);
108 if (!aead_req)
109 return -ENOMEM;
110
111 memset(zero_nonce, 0, sizeof(zero_nonce));
112 sg_init_one(&sgio, data, datalen + (op == BIG_KEY_ENC ? ENC_AUTHTAG_SIZE : 0));
113 aead_request_set_crypt(aead_req, &sgio, &sgio, datalen, zero_nonce);
114 aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
115 aead_request_set_ad(aead_req, 0);
116
117 mutex_lock(&big_key_aead_lock);
118 if (crypto_aead_setkey(big_key_aead, key, ENC_KEY_SIZE)) {
99 ret = -EAGAIN; 119 ret = -EAGAIN;
100 goto error; 120 goto error;
101 } 121 }
102
103 skcipher_request_set_tfm(req, big_key_skcipher);
104 skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP,
105 NULL, NULL);
106
107 sg_init_one(&sgio, data, datalen);
108 skcipher_request_set_crypt(req, &sgio, &sgio, datalen, NULL);
109
110 if (op == BIG_KEY_ENC) 122 if (op == BIG_KEY_ENC)
111 ret = crypto_skcipher_encrypt(req); 123 ret = crypto_aead_encrypt(aead_req);
112 else 124 else
113 ret = crypto_skcipher_decrypt(req); 125 ret = crypto_aead_decrypt(aead_req);
114
115 skcipher_request_zero(req);
116
117error: 126error:
127 mutex_unlock(&big_key_aead_lock);
128 aead_request_free(aead_req);
118 return ret; 129 return ret;
119} 130}
120 131
@@ -146,16 +157,13 @@ int big_key_preparse(struct key_preparsed_payload *prep)
146 * 157 *
147 * File content is stored encrypted with randomly generated key. 158 * File content is stored encrypted with randomly generated key.
148 */ 159 */
149 size_t enclen = ALIGN(datalen, crypto_skcipher_blocksize(big_key_skcipher)); 160 size_t enclen = datalen + ENC_AUTHTAG_SIZE;
150 loff_t pos = 0; 161 loff_t pos = 0;
151 162
152 /* prepare aligned data to encrypt */
153 data = kmalloc(enclen, GFP_KERNEL); 163 data = kmalloc(enclen, GFP_KERNEL);
154 if (!data) 164 if (!data)
155 return -ENOMEM; 165 return -ENOMEM;
156
157 memcpy(data, prep->data, datalen); 166 memcpy(data, prep->data, datalen);
158 memset(data + datalen, 0x00, enclen - datalen);
159 167
160 /* generate random key */ 168 /* generate random key */
161 enckey = kmalloc(ENC_KEY_SIZE, GFP_KERNEL); 169 enckey = kmalloc(ENC_KEY_SIZE, GFP_KERNEL);
@@ -163,13 +171,12 @@ int big_key_preparse(struct key_preparsed_payload *prep)
163 ret = -ENOMEM; 171 ret = -ENOMEM;
164 goto error; 172 goto error;
165 } 173 }
166 174 ret = get_random_bytes_wait(enckey, ENC_KEY_SIZE);
167 ret = big_key_gen_enckey(enckey); 175 if (unlikely(ret))
168 if (ret)
169 goto err_enckey; 176 goto err_enckey;
170 177
171 /* encrypt aligned data */ 178 /* encrypt aligned data */
172 ret = big_key_crypt(BIG_KEY_ENC, data, enclen, enckey); 179 ret = big_key_crypt(BIG_KEY_ENC, data, datalen, enckey);
173 if (ret) 180 if (ret)
174 goto err_enckey; 181 goto err_enckey;
175 182
@@ -195,7 +202,7 @@ int big_key_preparse(struct key_preparsed_payload *prep)
195 *path = file->f_path; 202 *path = file->f_path;
196 path_get(path); 203 path_get(path);
197 fput(file); 204 fput(file);
198 kfree(data); 205 kzfree(data);
199 } else { 206 } else {
200 /* Just store the data in a buffer */ 207 /* Just store the data in a buffer */
201 void *data = kmalloc(datalen, GFP_KERNEL); 208 void *data = kmalloc(datalen, GFP_KERNEL);
@@ -211,9 +218,9 @@ int big_key_preparse(struct key_preparsed_payload *prep)
211err_fput: 218err_fput:
212 fput(file); 219 fput(file);
213err_enckey: 220err_enckey:
214 kfree(enckey); 221 kzfree(enckey);
215error: 222error:
216 kfree(data); 223 kzfree(data);
217 return ret; 224 return ret;
218} 225}
219 226
@@ -227,7 +234,7 @@ void big_key_free_preparse(struct key_preparsed_payload *prep)
227 234
228 path_put(path); 235 path_put(path);
229 } 236 }
230 kfree(prep->payload.data[big_key_data]); 237 kzfree(prep->payload.data[big_key_data]);
231} 238}
232 239
233/* 240/*
@@ -259,7 +266,7 @@ void big_key_destroy(struct key *key)
259 path->mnt = NULL; 266 path->mnt = NULL;
260 path->dentry = NULL; 267 path->dentry = NULL;
261 } 268 }
262 kfree(key->payload.data[big_key_data]); 269 kzfree(key->payload.data[big_key_data]);
263 key->payload.data[big_key_data] = NULL; 270 key->payload.data[big_key_data] = NULL;
264} 271}
265 272
@@ -295,7 +302,7 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
295 struct file *file; 302 struct file *file;
296 u8 *data; 303 u8 *data;
297 u8 *enckey = (u8 *)key->payload.data[big_key_data]; 304 u8 *enckey = (u8 *)key->payload.data[big_key_data];
298 size_t enclen = ALIGN(datalen, crypto_skcipher_blocksize(big_key_skcipher)); 305 size_t enclen = datalen + ENC_AUTHTAG_SIZE;
299 loff_t pos = 0; 306 loff_t pos = 0;
300 307
301 data = kmalloc(enclen, GFP_KERNEL); 308 data = kmalloc(enclen, GFP_KERNEL);
@@ -328,7 +335,7 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
328err_fput: 335err_fput:
329 fput(file); 336 fput(file);
330error: 337error:
331 kfree(data); 338 kzfree(data);
332 } else { 339 } else {
333 ret = datalen; 340 ret = datalen;
334 if (copy_to_user(buffer, key->payload.data[big_key_data], 341 if (copy_to_user(buffer, key->payload.data[big_key_data],
@@ -344,47 +351,31 @@ error:
344 */ 351 */
345static int __init big_key_init(void) 352static int __init big_key_init(void)
346{ 353{
347 struct crypto_skcipher *cipher;
348 struct crypto_rng *rng;
349 int ret; 354 int ret;
350 355
351 rng = crypto_alloc_rng(big_key_rng_name, 0, 0);
352 if (IS_ERR(rng)) {
353 pr_err("Can't alloc rng: %ld\n", PTR_ERR(rng));
354 return PTR_ERR(rng);
355 }
356
357 big_key_rng = rng;
358
359 /* seed RNG */
360 ret = crypto_rng_reset(rng, NULL, crypto_rng_seedsize(rng));
361 if (ret) {
362 pr_err("Can't reset rng: %d\n", ret);
363 goto error_rng;
364 }
365
366 /* init block cipher */ 356 /* init block cipher */
367 cipher = crypto_alloc_skcipher(big_key_alg_name, 0, CRYPTO_ALG_ASYNC); 357 big_key_aead = crypto_alloc_aead(big_key_alg_name, 0, CRYPTO_ALG_ASYNC);
368 if (IS_ERR(cipher)) { 358 if (IS_ERR(big_key_aead)) {
369 ret = PTR_ERR(cipher); 359 ret = PTR_ERR(big_key_aead);
370 pr_err("Can't alloc crypto: %d\n", ret); 360 pr_err("Can't alloc crypto: %d\n", ret);
371 goto error_rng; 361 return ret;
362 }
363 ret = crypto_aead_setauthsize(big_key_aead, ENC_AUTHTAG_SIZE);
364 if (ret < 0) {
365 pr_err("Can't set crypto auth tag len: %d\n", ret);
366 goto free_aead;
372 } 367 }
373
374 big_key_skcipher = cipher;
375 368
376 ret = register_key_type(&key_type_big_key); 369 ret = register_key_type(&key_type_big_key);
377 if (ret < 0) { 370 if (ret < 0) {
378 pr_err("Can't register type: %d\n", ret); 371 pr_err("Can't register type: %d\n", ret);
379 goto error_cipher; 372 goto free_aead;
380 } 373 }
381 374
382 return 0; 375 return 0;
383 376
384error_cipher: 377free_aead:
385 crypto_free_skcipher(big_key_skcipher); 378 crypto_free_aead(big_key_aead);
386error_rng:
387 crypto_free_rng(big_key_rng);
388 return ret; 379 return ret;
389} 380}
390 381
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 1c02c6547038..503adbae7b0d 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -141,7 +141,7 @@ extern key_ref_t keyring_search_aux(key_ref_t keyring_ref,
141extern key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx); 141extern key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx);
142extern key_ref_t search_process_keyrings(struct keyring_search_context *ctx); 142extern key_ref_t search_process_keyrings(struct keyring_search_context *ctx);
143 143
144extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check); 144extern struct key *find_keyring_by_name(const char *name, bool uid_keyring);
145 145
146extern int install_user_keyrings(void); 146extern int install_user_keyrings(void);
147extern int install_thread_keyring_to_cred(struct cred *); 147extern int install_thread_keyring_to_cred(struct cred *);
diff --git a/security/keys/key.c b/security/keys/key.c
index 83da68d98b40..eb914a838840 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -54,10 +54,10 @@ void __key_check(const struct key *key)
54struct key_user *key_user_lookup(kuid_t uid) 54struct key_user *key_user_lookup(kuid_t uid)
55{ 55{
56 struct key_user *candidate = NULL, *user; 56 struct key_user *candidate = NULL, *user;
57 struct rb_node *parent = NULL; 57 struct rb_node *parent, **p;
58 struct rb_node **p;
59 58
60try_again: 59try_again:
60 parent = NULL;
61 p = &key_user_tree.rb_node; 61 p = &key_user_tree.rb_node;
62 spin_lock(&key_user_lock); 62 spin_lock(&key_user_lock);
63 63
@@ -302,6 +302,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
302 key->flags |= 1 << KEY_FLAG_IN_QUOTA; 302 key->flags |= 1 << KEY_FLAG_IN_QUOTA;
303 if (flags & KEY_ALLOC_BUILT_IN) 303 if (flags & KEY_ALLOC_BUILT_IN)
304 key->flags |= 1 << KEY_FLAG_BUILTIN; 304 key->flags |= 1 << KEY_FLAG_BUILTIN;
305 if (flags & KEY_ALLOC_UID_KEYRING)
306 key->flags |= 1 << KEY_FLAG_UID_KEYRING;
305 307
306#ifdef KEY_DEBUGGING 308#ifdef KEY_DEBUGGING
307 key->magic = KEY_DEBUG_MAGIC; 309 key->magic = KEY_DEBUG_MAGIC;
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index ab0b337c84b4..365ff85d7e27 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -766,12 +766,17 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen)
766 766
767 key = key_ref_to_ptr(key_ref); 767 key = key_ref_to_ptr(key_ref);
768 768
769 if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) {
770 ret = -ENOKEY;
771 goto error2;
772 }
773
769 /* see if we can read it directly */ 774 /* see if we can read it directly */
770 ret = key_permission(key_ref, KEY_NEED_READ); 775 ret = key_permission(key_ref, KEY_NEED_READ);
771 if (ret == 0) 776 if (ret == 0)
772 goto can_read_key; 777 goto can_read_key;
773 if (ret != -EACCES) 778 if (ret != -EACCES)
774 goto error; 779 goto error2;
775 780
776 /* we can't; see if it's searchable from this process's keyrings 781 /* we can't; see if it's searchable from this process's keyrings
777 * - we automatically take account of the fact that it may be 782 * - we automatically take account of the fact that it may be
@@ -1406,11 +1411,9 @@ long keyctl_assume_authority(key_serial_t id)
1406 } 1411 }
1407 1412
1408 ret = keyctl_change_reqkey_auth(authkey); 1413 ret = keyctl_change_reqkey_auth(authkey);
1409 if (ret < 0) 1414 if (ret == 0)
1410 goto error; 1415 ret = authkey->serial;
1411 key_put(authkey); 1416 key_put(authkey);
1412
1413 ret = authkey->serial;
1414error: 1417error:
1415 return ret; 1418 return ret;
1416} 1419}
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index de81793f9920..4fa82a8a9c0e 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -423,7 +423,7 @@ static void keyring_describe(const struct key *keyring, struct seq_file *m)
423} 423}
424 424
425struct keyring_read_iterator_context { 425struct keyring_read_iterator_context {
426 size_t qty; 426 size_t buflen;
427 size_t count; 427 size_t count;
428 key_serial_t __user *buffer; 428 key_serial_t __user *buffer;
429}; 429};
@@ -435,9 +435,9 @@ static int keyring_read_iterator(const void *object, void *data)
435 int ret; 435 int ret;
436 436
437 kenter("{%s,%d},,{%zu/%zu}", 437 kenter("{%s,%d},,{%zu/%zu}",
438 key->type->name, key->serial, ctx->count, ctx->qty); 438 key->type->name, key->serial, ctx->count, ctx->buflen);
439 439
440 if (ctx->count >= ctx->qty) 440 if (ctx->count >= ctx->buflen)
441 return 1; 441 return 1;
442 442
443 ret = put_user(key->serial, ctx->buffer); 443 ret = put_user(key->serial, ctx->buffer);
@@ -472,16 +472,12 @@ static long keyring_read(const struct key *keyring,
472 return 0; 472 return 0;
473 473
474 /* Calculate how much data we could return */ 474 /* Calculate how much data we could return */
475 ctx.qty = nr_keys * sizeof(key_serial_t);
476
477 if (!buffer || !buflen) 475 if (!buffer || !buflen)
478 return ctx.qty; 476 return nr_keys * sizeof(key_serial_t);
479
480 if (buflen > ctx.qty)
481 ctx.qty = buflen;
482 477
483 /* Copy the IDs of the subscribed keys into the buffer */ 478 /* Copy the IDs of the subscribed keys into the buffer */
484 ctx.buffer = (key_serial_t __user *)buffer; 479 ctx.buffer = (key_serial_t __user *)buffer;
480 ctx.buflen = buflen;
485 ctx.count = 0; 481 ctx.count = 0;
486 ret = assoc_array_iterate(&keyring->keys, keyring_read_iterator, &ctx); 482 ret = assoc_array_iterate(&keyring->keys, keyring_read_iterator, &ctx);
487 if (ret < 0) { 483 if (ret < 0) {
@@ -1101,15 +1097,15 @@ found:
1101/* 1097/*
1102 * Find a keyring with the specified name. 1098 * Find a keyring with the specified name.
1103 * 1099 *
1104 * All named keyrings in the current user namespace are searched, provided they 1100 * Only keyrings that have nonzero refcount, are not revoked, and are owned by a
1105 * grant Search permission directly to the caller (unless this check is 1101 * user in the current user namespace are considered. If @uid_keyring is %true,
1106 * skipped). Keyrings whose usage points have reached zero or who have been 1102 * the keyring additionally must have been allocated as a user or user session
1107 * revoked are skipped. 1103 * keyring; otherwise, it must grant Search permission directly to the caller.
1108 * 1104 *
1109 * Returns a pointer to the keyring with the keyring's refcount having being 1105 * Returns a pointer to the keyring with the keyring's refcount having being
1110 * incremented on success. -ENOKEY is returned if a key could not be found. 1106 * incremented on success. -ENOKEY is returned if a key could not be found.
1111 */ 1107 */
1112struct key *find_keyring_by_name(const char *name, bool skip_perm_check) 1108struct key *find_keyring_by_name(const char *name, bool uid_keyring)
1113{ 1109{
1114 struct key *keyring; 1110 struct key *keyring;
1115 int bucket; 1111 int bucket;
@@ -1137,10 +1133,15 @@ struct key *find_keyring_by_name(const char *name, bool skip_perm_check)
1137 if (strcmp(keyring->description, name) != 0) 1133 if (strcmp(keyring->description, name) != 0)
1138 continue; 1134 continue;
1139 1135
1140 if (!skip_perm_check && 1136 if (uid_keyring) {
1141 key_permission(make_key_ref(keyring, 0), 1137 if (!test_bit(KEY_FLAG_UID_KEYRING,
1142 KEY_NEED_SEARCH) < 0) 1138 &keyring->flags))
1143 continue; 1139 continue;
1140 } else {
1141 if (key_permission(make_key_ref(keyring, 0),
1142 KEY_NEED_SEARCH) < 0)
1143 continue;
1144 }
1144 1145
1145 /* we've got a match but we might end up racing with 1146 /* we've got a match but we might end up racing with
1146 * key_cleanup() if the keyring is currently 'dead' 1147 * key_cleanup() if the keyring is currently 'dead'
diff --git a/security/keys/proc.c b/security/keys/proc.c
index bf08d02b6646..de834309d100 100644
--- a/security/keys/proc.c
+++ b/security/keys/proc.c
@@ -187,7 +187,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
187 struct keyring_search_context ctx = { 187 struct keyring_search_context ctx = {
188 .index_key.type = key->type, 188 .index_key.type = key->type,
189 .index_key.description = key->description, 189 .index_key.description = key->description,
190 .cred = current_cred(), 190 .cred = m->file->f_cred,
191 .match_data.cmp = lookup_user_key_possessed, 191 .match_data.cmp = lookup_user_key_possessed,
192 .match_data.raw_data = key, 192 .match_data.raw_data = key,
193 .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, 193 .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT,
@@ -207,11 +207,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
207 } 207 }
208 } 208 }
209 209
210 /* check whether the current task is allowed to view the key (assuming 210 /* check whether the current task is allowed to view the key */
211 * non-possession)
212 * - the caller holds a spinlock, and thus the RCU read lock, making our
213 * access to __current_cred() safe
214 */
215 rc = key_task_permission(key_ref, ctx.cred, KEY_NEED_VIEW); 211 rc = key_task_permission(key_ref, ctx.cred, KEY_NEED_VIEW);
216 if (rc < 0) 212 if (rc < 0)
217 return 0; 213 return 0;
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 86bced9fdbdf..293d3598153b 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -77,7 +77,8 @@ int install_user_keyrings(void)
77 if (IS_ERR(uid_keyring)) { 77 if (IS_ERR(uid_keyring)) {
78 uid_keyring = keyring_alloc(buf, user->uid, INVALID_GID, 78 uid_keyring = keyring_alloc(buf, user->uid, INVALID_GID,
79 cred, user_keyring_perm, 79 cred, user_keyring_perm,
80 KEY_ALLOC_IN_QUOTA, 80 KEY_ALLOC_UID_KEYRING |
81 KEY_ALLOC_IN_QUOTA,
81 NULL, NULL); 82 NULL, NULL);
82 if (IS_ERR(uid_keyring)) { 83 if (IS_ERR(uid_keyring)) {
83 ret = PTR_ERR(uid_keyring); 84 ret = PTR_ERR(uid_keyring);
@@ -94,7 +95,8 @@ int install_user_keyrings(void)
94 session_keyring = 95 session_keyring =
95 keyring_alloc(buf, user->uid, INVALID_GID, 96 keyring_alloc(buf, user->uid, INVALID_GID,
96 cred, user_keyring_perm, 97 cred, user_keyring_perm,
97 KEY_ALLOC_IN_QUOTA, 98 KEY_ALLOC_UID_KEYRING |
99 KEY_ALLOC_IN_QUOTA,
98 NULL, NULL); 100 NULL, NULL);
99 if (IS_ERR(session_keyring)) { 101 if (IS_ERR(session_keyring)) {
100 ret = PTR_ERR(session_keyring); 102 ret = PTR_ERR(session_keyring);
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index afe9d22ab361..6ebf1af8fce9 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -120,6 +120,18 @@ static void request_key_auth_revoke(struct key *key)
120 } 120 }
121} 121}
122 122
123static void free_request_key_auth(struct request_key_auth *rka)
124{
125 if (!rka)
126 return;
127 key_put(rka->target_key);
128 key_put(rka->dest_keyring);
129 if (rka->cred)
130 put_cred(rka->cred);
131 kfree(rka->callout_info);
132 kfree(rka);
133}
134
123/* 135/*
124 * Destroy an instantiation authorisation token key. 136 * Destroy an instantiation authorisation token key.
125 */ 137 */
@@ -129,15 +141,7 @@ static void request_key_auth_destroy(struct key *key)
129 141
130 kenter("{%d}", key->serial); 142 kenter("{%d}", key->serial);
131 143
132 if (rka->cred) { 144 free_request_key_auth(rka);
133 put_cred(rka->cred);
134 rka->cred = NULL;
135 }
136
137 key_put(rka->target_key);
138 key_put(rka->dest_keyring);
139 kfree(rka->callout_info);
140 kfree(rka);
141} 145}
142 146
143/* 147/*
@@ -151,22 +155,18 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
151 const struct cred *cred = current->cred; 155 const struct cred *cred = current->cred;
152 struct key *authkey = NULL; 156 struct key *authkey = NULL;
153 char desc[20]; 157 char desc[20];
154 int ret; 158 int ret = -ENOMEM;
155 159
156 kenter("%d,", target->serial); 160 kenter("%d,", target->serial);
157 161
158 /* allocate a auth record */ 162 /* allocate a auth record */
159 rka = kmalloc(sizeof(*rka), GFP_KERNEL); 163 rka = kzalloc(sizeof(*rka), GFP_KERNEL);
160 if (!rka) { 164 if (!rka)
161 kleave(" = -ENOMEM"); 165 goto error;
162 return ERR_PTR(-ENOMEM); 166 rka->callout_info = kmemdup(callout_info, callout_len, GFP_KERNEL);
163 } 167 if (!rka->callout_info)
164 rka->callout_info = kmalloc(callout_len, GFP_KERNEL); 168 goto error_free_rka;
165 if (!rka->callout_info) { 169 rka->callout_len = callout_len;
166 kleave(" = -ENOMEM");
167 kfree(rka);
168 return ERR_PTR(-ENOMEM);
169 }
170 170
171 /* see if the calling process is already servicing the key request of 171 /* see if the calling process is already servicing the key request of
172 * another process */ 172 * another process */
@@ -176,8 +176,12 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
176 176
177 /* if the auth key has been revoked, then the key we're 177 /* if the auth key has been revoked, then the key we're
178 * servicing is already instantiated */ 178 * servicing is already instantiated */
179 if (test_bit(KEY_FLAG_REVOKED, &cred->request_key_auth->flags)) 179 if (test_bit(KEY_FLAG_REVOKED,
180 goto auth_key_revoked; 180 &cred->request_key_auth->flags)) {
181 up_read(&cred->request_key_auth->sem);
182 ret = -EKEYREVOKED;
183 goto error_free_rka;
184 }
181 185
182 irka = cred->request_key_auth->payload.data[0]; 186 irka = cred->request_key_auth->payload.data[0];
183 rka->cred = get_cred(irka->cred); 187 rka->cred = get_cred(irka->cred);
@@ -193,8 +197,6 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
193 197
194 rka->target_key = key_get(target); 198 rka->target_key = key_get(target);
195 rka->dest_keyring = key_get(dest_keyring); 199 rka->dest_keyring = key_get(dest_keyring);
196 memcpy(rka->callout_info, callout_info, callout_len);
197 rka->callout_len = callout_len;
198 200
199 /* allocate the auth key */ 201 /* allocate the auth key */
200 sprintf(desc, "%x", target->serial); 202 sprintf(desc, "%x", target->serial);
@@ -205,32 +207,22 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
205 KEY_USR_VIEW, KEY_ALLOC_NOT_IN_QUOTA, NULL); 207 KEY_USR_VIEW, KEY_ALLOC_NOT_IN_QUOTA, NULL);
206 if (IS_ERR(authkey)) { 208 if (IS_ERR(authkey)) {
207 ret = PTR_ERR(authkey); 209 ret = PTR_ERR(authkey);
208 goto error_alloc; 210 goto error_free_rka;
209 } 211 }
210 212
211 /* construct the auth key */ 213 /* construct the auth key */
212 ret = key_instantiate_and_link(authkey, rka, 0, NULL, NULL); 214 ret = key_instantiate_and_link(authkey, rka, 0, NULL, NULL);
213 if (ret < 0) 215 if (ret < 0)
214 goto error_inst; 216 goto error_put_authkey;
215 217
216 kleave(" = {%d,%d}", authkey->serial, refcount_read(&authkey->usage)); 218 kleave(" = {%d,%d}", authkey->serial, refcount_read(&authkey->usage));
217 return authkey; 219 return authkey;
218 220
219auth_key_revoked: 221error_put_authkey:
220 up_read(&cred->request_key_auth->sem);
221 kfree(rka->callout_info);
222 kfree(rka);
223 kleave("= -EKEYREVOKED");
224 return ERR_PTR(-EKEYREVOKED);
225
226error_inst:
227 key_revoke(authkey);
228 key_put(authkey); 222 key_put(authkey);
229error_alloc: 223error_free_rka:
230 key_put(rka->target_key); 224 free_request_key_auth(rka);
231 key_put(rka->dest_keyring); 225error:
232 kfree(rka->callout_info);
233 kfree(rka);
234 kleave("= %d", ret); 226 kleave("= %d", ret);
235 return ERR_PTR(ret); 227 return ERR_PTR(ret);
236} 228}
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 69d09c39bbcd..cd7359e23d86 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -88,6 +88,12 @@ struct kvm_s390_io_adapter_req {
88/* kvm attributes for KVM_S390_VM_TOD */ 88/* kvm attributes for KVM_S390_VM_TOD */
89#define KVM_S390_VM_TOD_LOW 0 89#define KVM_S390_VM_TOD_LOW 0
90#define KVM_S390_VM_TOD_HIGH 1 90#define KVM_S390_VM_TOD_HIGH 1
91#define KVM_S390_VM_TOD_EXT 2
92
93struct kvm_s390_vm_tod_clock {
94 __u8 epoch_idx;
95 __u64 tod;
96};
91 97
92/* kvm attributes for KVM_S390_VM_CPU_MODEL */ 98/* kvm attributes for KVM_S390_VM_CPU_MODEL */
93/* processor related attributes are r/w */ 99/* processor related attributes are r/w */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 8ea315a11fe0..2519c6c801c9 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -196,6 +196,7 @@
196 196
197#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ 197#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
198#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ 198#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
199#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
199 200
200#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ 201#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
201#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ 202#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
@@ -287,6 +288,7 @@
287#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ 288#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
288#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ 289#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
289#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ 290#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
291#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
290 292
291/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ 293/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
292#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ 294#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 5dff775af7cd..c10c9128f54e 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -21,11 +21,13 @@
21# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) 21# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31))
22# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31)) 22# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31))
23# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31)) 23# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31))
24# define DISABLE_PCID 0
24#else 25#else
25# define DISABLE_VME 0 26# define DISABLE_VME 0
26# define DISABLE_K6_MTRR 0 27# define DISABLE_K6_MTRR 0
27# define DISABLE_CYRIX_ARR 0 28# define DISABLE_CYRIX_ARR 0
28# define DISABLE_CENTAUR_MCR 0 29# define DISABLE_CENTAUR_MCR 0
30# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31))
29#endif /* CONFIG_X86_64 */ 31#endif /* CONFIG_X86_64 */
30 32
31#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS 33#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
@@ -49,7 +51,7 @@
49#define DISABLED_MASK1 0 51#define DISABLED_MASK1 0
50#define DISABLED_MASK2 0 52#define DISABLED_MASK2 0
51#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR) 53#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
52#define DISABLED_MASK4 0 54#define DISABLED_MASK4 (DISABLE_PCID)
53#define DISABLED_MASK5 0 55#define DISABLED_MASK5 0
54#define DISABLED_MASK6 0 56#define DISABLED_MASK6 0
55#define DISABLED_MASK7 0 57#define DISABLED_MASK7 0
diff --git a/tools/include/asm-generic/hugetlb_encode.h b/tools/include/asm-generic/hugetlb_encode.h
new file mode 100644
index 000000000000..e4732d3c2998
--- /dev/null
+++ b/tools/include/asm-generic/hugetlb_encode.h
@@ -0,0 +1,34 @@
1#ifndef _ASM_GENERIC_HUGETLB_ENCODE_H_
2#define _ASM_GENERIC_HUGETLB_ENCODE_H_
3
4/*
5 * Several system calls take a flag to request "hugetlb" huge pages.
6 * Without further specification, these system calls will use the
7 * system's default huge page size. If a system supports multiple
8 * huge page sizes, the desired huge page size can be specified in
9 * bits [26:31] of the flag arguments. The value in these 6 bits
10 * will encode the log2 of the huge page size.
11 *
12 * The following definitions are associated with this huge page size
13 * encoding in flag arguments. System call specific header files
14 * that use this encoding should include this file. They can then
15 * provide definitions based on these with their own specific prefix.
16 * for example:
17 * #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
18 */
19
20#define HUGETLB_FLAG_ENCODE_SHIFT 26
21#define HUGETLB_FLAG_ENCODE_MASK 0x3f
22
23#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT)
24#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT)
25#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT)
26#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT)
27#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT)
28#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT)
29#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT)
30#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT)
31#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT)
32#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT)
33
34#endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */
diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h
index 8c27db0c5c08..203268f9231e 100644
--- a/tools/include/uapi/asm-generic/mman-common.h
+++ b/tools/include/uapi/asm-generic/mman-common.h
@@ -58,20 +58,12 @@
58 overrides the coredump filter bits */ 58 overrides the coredump filter bits */
59#define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag */ 59#define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag */
60 60
61#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */
62#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */
63
61/* compatibility flags */ 64/* compatibility flags */
62#define MAP_FILE 0 65#define MAP_FILE 0
63 66
64/*
65 * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
66 * This gives us 6 bits, which is enough until someone invents 128 bit address
67 * spaces.
68 *
69 * Assume these are all power of twos.
70 * When 0 use the default page size.
71 */
72#define MAP_HUGE_SHIFT 26
73#define MAP_HUGE_MASK 0x3f
74
75#define PKEY_DISABLE_ACCESS 0x1 67#define PKEY_DISABLE_ACCESS 0x1
76#define PKEY_DISABLE_WRITE 0x2 68#define PKEY_DISABLE_WRITE 0x2
77#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ 69#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 101593ab10ac..97677cd6964d 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -700,6 +700,7 @@ struct drm_prime_handle {
700 700
701struct drm_syncobj_create { 701struct drm_syncobj_create {
702 __u32 handle; 702 __u32 handle;
703#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
703 __u32 flags; 704 __u32 flags;
704}; 705};
705 706
@@ -718,6 +719,24 @@ struct drm_syncobj_handle {
718 __u32 pad; 719 __u32 pad;
719}; 720};
720 721
722#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
723#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
724struct drm_syncobj_wait {
725 __u64 handles;
726 /* absolute timeout */
727 __s64 timeout_nsec;
728 __u32 count_handles;
729 __u32 flags;
730 __u32 first_signaled; /* only valid when not waiting all */
731 __u32 pad;
732};
733
734struct drm_syncobj_array {
735 __u64 handles;
736 __u32 count_handles;
737 __u32 pad;
738};
739
721#if defined(__cplusplus) 740#if defined(__cplusplus)
722} 741}
723#endif 742#endif
@@ -840,6 +859,9 @@ extern "C" {
840#define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy) 859#define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy)
841#define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) 860#define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle)
842#define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) 861#define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle)
862#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait)
863#define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array)
864#define DRM_IOCTL_SYNCOBJ_SIGNAL DRM_IOWR(0xC5, struct drm_syncobj_array)
843 865
844/** 866/**
845 * Device specific ioctls should only be in their respective headers 867 * Device specific ioctls should only be in their respective headers
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 7ccbd6a2bbe0..6598fb76d2c2 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -260,6 +260,8 @@ typedef struct _drm_i915_sarea {
260#define DRM_I915_GEM_CONTEXT_GETPARAM 0x34 260#define DRM_I915_GEM_CONTEXT_GETPARAM 0x34
261#define DRM_I915_GEM_CONTEXT_SETPARAM 0x35 261#define DRM_I915_GEM_CONTEXT_SETPARAM 0x35
262#define DRM_I915_PERF_OPEN 0x36 262#define DRM_I915_PERF_OPEN 0x36
263#define DRM_I915_PERF_ADD_CONFIG 0x37
264#define DRM_I915_PERF_REMOVE_CONFIG 0x38
263 265
264#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) 266#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
265#define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) 267#define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -315,6 +317,8 @@ typedef struct _drm_i915_sarea {
315#define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param) 317#define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param)
316#define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param) 318#define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param)
317#define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param) 319#define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
320#define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
321#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
318 322
319/* Allow drivers to submit batchbuffers directly to hardware, relying 323/* Allow drivers to submit batchbuffers directly to hardware, relying
320 * on the security mechanisms provided by hardware. 324 * on the security mechanisms provided by hardware.
@@ -431,6 +435,11 @@ typedef struct drm_i915_irq_wait {
431 */ 435 */
432#define I915_PARAM_HAS_EXEC_BATCH_FIRST 48 436#define I915_PARAM_HAS_EXEC_BATCH_FIRST 48
433 437
438/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
439 * drm_i915_gem_exec_fence structures. See I915_EXEC_FENCE_ARRAY.
440 */
441#define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49
442
434typedef struct drm_i915_getparam { 443typedef struct drm_i915_getparam {
435 __s32 param; 444 __s32 param;
436 /* 445 /*
@@ -812,6 +821,17 @@ struct drm_i915_gem_exec_object2 {
812 __u64 rsvd2; 821 __u64 rsvd2;
813}; 822};
814 823
824struct drm_i915_gem_exec_fence {
825 /**
826 * User's handle for a drm_syncobj to wait on or signal.
827 */
828 __u32 handle;
829
830#define I915_EXEC_FENCE_WAIT (1<<0)
831#define I915_EXEC_FENCE_SIGNAL (1<<1)
832 __u32 flags;
833};
834
815struct drm_i915_gem_execbuffer2 { 835struct drm_i915_gem_execbuffer2 {
816 /** 836 /**
817 * List of gem_exec_object2 structs 837 * List of gem_exec_object2 structs
@@ -826,7 +846,11 @@ struct drm_i915_gem_execbuffer2 {
826 __u32 DR1; 846 __u32 DR1;
827 __u32 DR4; 847 __u32 DR4;
828 __u32 num_cliprects; 848 __u32 num_cliprects;
829 /** This is a struct drm_clip_rect *cliprects */ 849 /**
850 * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY
851 * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a
852 * struct drm_i915_gem_exec_fence *fences.
853 */
830 __u64 cliprects_ptr; 854 __u64 cliprects_ptr;
831#define I915_EXEC_RING_MASK (7<<0) 855#define I915_EXEC_RING_MASK (7<<0)
832#define I915_EXEC_DEFAULT (0<<0) 856#define I915_EXEC_DEFAULT (0<<0)
@@ -927,7 +951,14 @@ struct drm_i915_gem_execbuffer2 {
927 * element). 951 * element).
928 */ 952 */
929#define I915_EXEC_BATCH_FIRST (1<<18) 953#define I915_EXEC_BATCH_FIRST (1<<18)
930#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1)) 954
955/* Setting I915_FENCE_ARRAY implies that num_cliprects and cliprects_ptr
956 * define an array of i915_gem_exec_fence structures which specify a set of
957 * dma fences to wait upon or signal.
958 */
959#define I915_EXEC_FENCE_ARRAY (1<<19)
960
961#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
931 962
932#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) 963#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
933#define i915_execbuffer2_set_context_id(eb2, context) \ 964#define i915_execbuffer2_set_context_id(eb2, context) \
@@ -1467,6 +1498,22 @@ enum drm_i915_perf_record_type {
1467 DRM_I915_PERF_RECORD_MAX /* non-ABI */ 1498 DRM_I915_PERF_RECORD_MAX /* non-ABI */
1468}; 1499};
1469 1500
1501/**
1502 * Structure to upload perf dynamic configuration into the kernel.
1503 */
1504struct drm_i915_perf_oa_config {
1505 /** String formatted like "%08x-%04x-%04x-%04x-%012x" */
1506 char uuid[36];
1507
1508 __u32 n_mux_regs;
1509 __u32 n_boolean_regs;
1510 __u32 n_flex_regs;
1511
1512 __u64 __user mux_regs_ptr;
1513 __u64 __user boolean_regs_ptr;
1514 __u64 __user flex_regs_ptr;
1515};
1516
1470#if defined(__cplusplus) 1517#if defined(__cplusplus)
1471} 1518}
1472#endif 1519#endif
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 461811e57140..43ab5c402f98 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -143,12 +143,6 @@ enum bpf_attach_type {
143 143
144#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE 144#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
145 145
146enum bpf_sockmap_flags {
147 BPF_SOCKMAP_UNSPEC,
148 BPF_SOCKMAP_STRPARSER,
149 __MAX_BPF_SOCKMAP_FLAG
150};
151
152/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command 146/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
153 * to the given target_fd cgroup the descendent cgroup will be able to 147 * to the given target_fd cgroup the descendent cgroup will be able to
154 * override effective bpf program that was inherited from this cgroup 148 * override effective bpf program that was inherited from this cgroup
@@ -368,9 +362,20 @@ union bpf_attr {
368 * int bpf_redirect(ifindex, flags) 362 * int bpf_redirect(ifindex, flags)
369 * redirect to another netdev 363 * redirect to another netdev
370 * @ifindex: ifindex of the net device 364 * @ifindex: ifindex of the net device
371 * @flags: bit 0 - if set, redirect to ingress instead of egress 365 * @flags:
372 * other bits - reserved 366 * cls_bpf:
373 * Return: TC_ACT_REDIRECT 367 * bit 0 - if set, redirect to ingress instead of egress
368 * other bits - reserved
369 * xdp_bpf:
370 * all bits - reserved
371 * Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
372 * xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
373 * int bpf_redirect_map(map, key, flags)
374 * redirect to endpoint in map
375 * @map: pointer to dev map
376 * @key: index in map to lookup
377 * @flags: --
378 * Return: XDP_REDIRECT on success or XDP_ABORT on error
374 * 379 *
375 * u32 bpf_get_route_realm(skb) 380 * u32 bpf_get_route_realm(skb)
376 * retrieve a dst's tclassid 381 * retrieve a dst's tclassid
@@ -632,7 +637,7 @@ union bpf_attr {
632 FN(skb_adjust_room), \ 637 FN(skb_adjust_room), \
633 FN(redirect_map), \ 638 FN(redirect_map), \
634 FN(sk_redirect_map), \ 639 FN(sk_redirect_map), \
635 FN(sock_map_update), 640 FN(sock_map_update), \
636 641
637/* integer value in 'imm' field of BPF_CALL instruction selects which helper 642/* integer value in 'imm' field of BPF_CALL instruction selects which helper
638 * function eBPF program intends to call 643 * function eBPF program intends to call
@@ -753,20 +758,23 @@ struct bpf_sock {
753 __u32 family; 758 __u32 family;
754 __u32 type; 759 __u32 type;
755 __u32 protocol; 760 __u32 protocol;
761 __u32 mark;
762 __u32 priority;
756}; 763};
757 764
758#define XDP_PACKET_HEADROOM 256 765#define XDP_PACKET_HEADROOM 256
759 766
760/* User return codes for XDP prog type. 767/* User return codes for XDP prog type.
761 * A valid XDP program must return one of these defined values. All other 768 * A valid XDP program must return one of these defined values. All other
762 * return codes are reserved for future use. Unknown return codes will result 769 * return codes are reserved for future use. Unknown return codes will
763 * in packet drop. 770 * result in packet drops and a warning via bpf_warn_invalid_xdp_action().
764 */ 771 */
765enum xdp_action { 772enum xdp_action {
766 XDP_ABORTED = 0, 773 XDP_ABORTED = 0,
767 XDP_DROP, 774 XDP_DROP,
768 XDP_PASS, 775 XDP_PASS,
769 XDP_TX, 776 XDP_TX,
777 XDP_REDIRECT,
770}; 778};
771 779
772/* user accessible metadata for XDP packet hook 780/* user accessible metadata for XDP packet hook
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 6cd63c18708a..838887587411 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -711,7 +711,8 @@ struct kvm_ppc_one_seg_page_size {
711struct kvm_ppc_smmu_info { 711struct kvm_ppc_smmu_info {
712 __u64 flags; 712 __u64 flags;
713 __u32 slb_size; 713 __u32 slb_size;
714 __u32 pad; 714 __u16 data_keys; /* # storage keys supported for data */
715 __u16 instr_keys; /* # storage keys supported for instructions */
715 struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; 716 struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
716}; 717};
717 718
diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
index 81d8edf11789..a937480d7cd3 100644
--- a/tools/include/uapi/linux/mman.h
+++ b/tools/include/uapi/linux/mman.h
@@ -1,7 +1,8 @@
1#ifndef _UAPI_LINUX_MMAN_H 1#ifndef _UAPI_LINUX_MMAN_H
2#define _UAPI_LINUX_MMAN_H 2#define _UAPI_LINUX_MMAN_H
3 3
4#include <uapi/asm/mman.h> 4#include <asm/mman.h>
5#include <asm-generic/hugetlb_encode.h>
5 6
6#define MREMAP_MAYMOVE 1 7#define MREMAP_MAYMOVE 1
7#define MREMAP_FIXED 2 8#define MREMAP_FIXED 2
@@ -10,4 +11,25 @@
10#define OVERCOMMIT_ALWAYS 1 11#define OVERCOMMIT_ALWAYS 1
11#define OVERCOMMIT_NEVER 2 12#define OVERCOMMIT_NEVER 2
12 13
14/*
15 * Huge page size encoding when MAP_HUGETLB is specified, and a huge page
16 * size other than the default is desired. See hugetlb_encode.h.
17 * All known huge page size encodings are provided here. It is the
18 * responsibility of the application to know which sizes are supported on
19 * the running system. See mmap(2) man page for details.
20 */
21#define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
22#define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK
23
24#define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB
25#define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB
26#define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB
27#define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
28#define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
29#define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
30#define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
31#define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
32#define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
33#define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
34
13#endif /* _UAPI_LINUX_MMAN_H */ 35#endif /* _UAPI_LINUX_MMAN_H */
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 0f22768c0d4d..34a579f806e3 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -284,11 +284,16 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
284 case 0x8d: 284 case 0x8d:
285 if (sib == 0x24 && rex_w && !rex_b && !rex_x) { 285 if (sib == 0x24 && rex_w && !rex_b && !rex_x) {
286 286
287 /* lea disp(%rsp), reg */
288 *type = INSN_STACK; 287 *type = INSN_STACK;
289 op->src.type = OP_SRC_ADD; 288 if (!insn.displacement.value) {
289 /* lea (%rsp), reg */
290 op->src.type = OP_SRC_REG;
291 } else {
292 /* lea disp(%rsp), reg */
293 op->src.type = OP_SRC_ADD;
294 op->src.offset = insn.displacement.value;
295 }
290 op->src.reg = CFI_SP; 296 op->src.reg = CFI_SP;
291 op->src.offset = insn.displacement.value;
292 op->dest.type = OP_DEST_REG; 297 op->dest.type = OP_DEST_REG;
293 op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; 298 op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
294 299
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 62072822dc85..627b7cada144 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,34 +1,8 @@
1tools/perf 1tools/perf
2tools/arch/alpha/include/asm/barrier.h 2tools/arch
3tools/arch/arm/include/asm/barrier.h
4tools/arch/arm64/include/asm/barrier.h
5tools/arch/ia64/include/asm/barrier.h
6tools/arch/mips/include/asm/barrier.h
7tools/arch/powerpc/include/asm/barrier.h
8tools/arch/s390/include/asm/barrier.h
9tools/arch/sh/include/asm/barrier.h
10tools/arch/sparc/include/asm/barrier.h
11tools/arch/sparc/include/asm/barrier_32.h
12tools/arch/sparc/include/asm/barrier_64.h
13tools/arch/tile/include/asm/barrier.h
14tools/arch/x86/include/asm/barrier.h
15tools/arch/x86/include/asm/cmpxchg.h
16tools/arch/x86/include/asm/cpufeatures.h
17tools/arch/x86/include/asm/disabled-features.h
18tools/arch/x86/include/asm/required-features.h
19tools/arch/x86/include/uapi/asm/svm.h
20tools/arch/x86/include/uapi/asm/vmx.h
21tools/arch/x86/include/uapi/asm/kvm.h
22tools/arch/x86/include/uapi/asm/kvm_perf.h
23tools/arch/x86/lib/memcpy_64.S
24tools/arch/x86/lib/memset_64.S
25tools/arch/s390/include/uapi/asm/kvm_perf.h
26tools/arch/s390/include/uapi/asm/sie.h
27tools/arch/xtensa/include/asm/barrier.h
28tools/scripts 3tools/scripts
29tools/build 4tools/build
30tools/arch/x86/include/asm/atomic.h 5tools/include
31tools/arch/x86/include/asm/rmwcc.h
32tools/lib/traceevent 6tools/lib/traceevent
33tools/lib/api 7tools/lib/api
34tools/lib/bpf 8tools/lib/bpf
@@ -42,60 +16,3 @@ tools/lib/find_bit.c
42tools/lib/bitmap.c 16tools/lib/bitmap.c
43tools/lib/str_error_r.c 17tools/lib/str_error_r.c
44tools/lib/vsprintf.c 18tools/lib/vsprintf.c
45tools/include/asm/alternative-asm.h
46tools/include/asm/atomic.h
47tools/include/asm/barrier.h
48tools/include/asm/bug.h
49tools/include/asm-generic/atomic-gcc.h
50tools/include/asm-generic/barrier.h
51tools/include/asm-generic/bitops/arch_hweight.h
52tools/include/asm-generic/bitops/atomic.h
53tools/include/asm-generic/bitops/const_hweight.h
54tools/include/asm-generic/bitops/__ffs.h
55tools/include/asm-generic/bitops/__ffz.h
56tools/include/asm-generic/bitops/__fls.h
57tools/include/asm-generic/bitops/find.h
58tools/include/asm-generic/bitops/fls64.h
59tools/include/asm-generic/bitops/fls.h
60tools/include/asm-generic/bitops/hweight.h
61tools/include/asm-generic/bitops.h
62tools/include/linux/atomic.h
63tools/include/linux/bitops.h
64tools/include/linux/compiler.h
65tools/include/linux/compiler-gcc.h
66tools/include/linux/coresight-pmu.h
67tools/include/linux/bug.h
68tools/include/linux/filter.h
69tools/include/linux/hash.h
70tools/include/linux/kernel.h
71tools/include/linux/list.h
72tools/include/linux/log2.h
73tools/include/uapi/asm-generic/fcntl.h
74tools/include/uapi/asm-generic/ioctls.h
75tools/include/uapi/asm-generic/mman-common.h
76tools/include/uapi/asm-generic/mman.h
77tools/include/uapi/drm/drm.h
78tools/include/uapi/drm/i915_drm.h
79tools/include/uapi/linux/bpf.h
80tools/include/uapi/linux/bpf_common.h
81tools/include/uapi/linux/fcntl.h
82tools/include/uapi/linux/hw_breakpoint.h
83tools/include/uapi/linux/kvm.h
84tools/include/uapi/linux/mman.h
85tools/include/uapi/linux/perf_event.h
86tools/include/uapi/linux/sched.h
87tools/include/uapi/linux/stat.h
88tools/include/uapi/linux/vhost.h
89tools/include/uapi/sound/asound.h
90tools/include/linux/poison.h
91tools/include/linux/rbtree.h
92tools/include/linux/rbtree_augmented.h
93tools/include/linux/refcount.h
94tools/include/linux/string.h
95tools/include/linux/stringify.h
96tools/include/linux/types.h
97tools/include/linux/err.h
98tools/include/linux/bitmap.h
99tools/include/linux/time64.h
100tools/arch/*/include/uapi/asm/mman.h
101tools/arch/*/include/uapi/asm/perf_regs.h
diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build
index bd518b623d7a..5bd7b9260cc0 100644
--- a/tools/perf/arch/s390/util/Build
+++ b/tools/perf/arch/s390/util/Build
@@ -1,5 +1,4 @@
1libperf-y += header.o 1libperf-y += header.o
2libperf-y += sym-handling.o
3libperf-y += kvm-stat.o 2libperf-y += kvm-stat.o
4 3
5libperf-$(CONFIG_DWARF) += dwarf-regs.o 4libperf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/tools/perf/arch/s390/util/sym-handling.c b/tools/perf/arch/s390/util/sym-handling.c
deleted file mode 100644
index e103f6e46afe..000000000000
--- a/tools/perf/arch/s390/util/sym-handling.c
+++ /dev/null
@@ -1,29 +0,0 @@
1/*
2 * Architecture specific ELF symbol handling and relocation mapping.
3 *
4 * Copyright 2017 IBM Corp.
5 * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License (version 2 only)
9 * as published by the Free Software Foundation.
10 */
11
12#include "symbol.h"
13
14#ifdef HAVE_LIBELF_SUPPORT
15bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
16{
17 if (ehdr.e_type == ET_EXEC)
18 return false;
19 return ehdr.e_type == ET_REL || ehdr.e_type == ET_DYN;
20}
21
22void arch__adjust_sym_map_offset(GElf_Sym *sym,
23 GElf_Shdr *shdr __maybe_unused,
24 struct map *map)
25{
26 if (map->type == MAP__FUNCTION)
27 sym->st_value += map->start;
28}
29#endif
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 510b513e0f01..be09d77cade0 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -65,8 +65,6 @@ static int parse_callchain_mode(const char *value)
65 callchain_param.mode = CHAIN_FOLDED; 65 callchain_param.mode = CHAIN_FOLDED;
66 return 0; 66 return 0;
67 } 67 }
68
69 pr_err("Invalid callchain mode: %s\n", value);
70 return -1; 68 return -1;
71} 69}
72 70
@@ -82,8 +80,6 @@ static int parse_callchain_order(const char *value)
82 callchain_param.order_set = true; 80 callchain_param.order_set = true;
83 return 0; 81 return 0;
84 } 82 }
85
86 pr_err("Invalid callchain order: %s\n", value);
87 return -1; 83 return -1;
88} 84}
89 85
@@ -105,8 +101,6 @@ static int parse_callchain_sort_key(const char *value)
105 callchain_param.branch_callstack = 1; 101 callchain_param.branch_callstack = 1;
106 return 0; 102 return 0;
107 } 103 }
108
109 pr_err("Invalid callchain sort key: %s\n", value);
110 return -1; 104 return -1;
111} 105}
112 106
@@ -124,8 +118,6 @@ static int parse_callchain_value(const char *value)
124 callchain_param.value = CCVAL_COUNT; 118 callchain_param.value = CCVAL_COUNT;
125 return 0; 119 return 0;
126 } 120 }
127
128 pr_err("Invalid callchain config key: %s\n", value);
129 return -1; 121 return -1;
130} 122}
131 123
@@ -319,12 +311,27 @@ int perf_callchain_config(const char *var, const char *value)
319 311
320 return ret; 312 return ret;
321 } 313 }
322 if (!strcmp(var, "print-type")) 314 if (!strcmp(var, "print-type")){
323 return parse_callchain_mode(value); 315 int ret;
324 if (!strcmp(var, "order")) 316 ret = parse_callchain_mode(value);
325 return parse_callchain_order(value); 317 if (ret == -1)
326 if (!strcmp(var, "sort-key")) 318 pr_err("Invalid callchain mode: %s\n", value);
327 return parse_callchain_sort_key(value); 319 return ret;
320 }
321 if (!strcmp(var, "order")){
322 int ret;
323 ret = parse_callchain_order(value);
324 if (ret == -1)
325 pr_err("Invalid callchain order: %s\n", value);
326 return ret;
327 }
328 if (!strcmp(var, "sort-key")){
329 int ret;
330 ret = parse_callchain_sort_key(value);
331 if (ret == -1)
332 pr_err("Invalid callchain sort key: %s\n", value);
333 return ret;
334 }
328 if (!strcmp(var, "threshold")) { 335 if (!strcmp(var, "threshold")) {
329 callchain_param.min_percent = strtod(value, &endptr); 336 callchain_param.min_percent = strtod(value, &endptr);
330 if (value == endptr) { 337 if (value == endptr) {
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 4bb89373eb52..0dccdb89572c 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -271,12 +271,17 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
271 return evsel; 271 return evsel;
272} 272}
273 273
274static bool perf_event_can_profile_kernel(void)
275{
276 return geteuid() == 0 || perf_event_paranoid() == -1;
277}
278
274struct perf_evsel *perf_evsel__new_cycles(bool precise) 279struct perf_evsel *perf_evsel__new_cycles(bool precise)
275{ 280{
276 struct perf_event_attr attr = { 281 struct perf_event_attr attr = {
277 .type = PERF_TYPE_HARDWARE, 282 .type = PERF_TYPE_HARDWARE,
278 .config = PERF_COUNT_HW_CPU_CYCLES, 283 .config = PERF_COUNT_HW_CPU_CYCLES,
279 .exclude_kernel = geteuid() != 0, 284 .exclude_kernel = !perf_event_can_profile_kernel(),
280 }; 285 };
281 struct perf_evsel *evsel; 286 struct perf_evsel *evsel;
282 287
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 5c39f420111e..9cf781f0d8a2 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -810,12 +810,6 @@ static u64 ref_reloc(struct kmap *kmap)
810void __weak arch__sym_update(struct symbol *s __maybe_unused, 810void __weak arch__sym_update(struct symbol *s __maybe_unused,
811 GElf_Sym *sym __maybe_unused) { } 811 GElf_Sym *sym __maybe_unused) { }
812 812
813void __weak arch__adjust_sym_map_offset(GElf_Sym *sym, GElf_Shdr *shdr,
814 struct map *map __maybe_unused)
815{
816 sym->st_value -= shdr->sh_addr - shdr->sh_offset;
817}
818
819int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, 813int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
820 struct symsrc *runtime_ss, int kmodule) 814 struct symsrc *runtime_ss, int kmodule)
821{ 815{
@@ -996,7 +990,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
996 990
997 /* Adjust symbol to map to file offset */ 991 /* Adjust symbol to map to file offset */
998 if (adjust_kernel_syms) 992 if (adjust_kernel_syms)
999 arch__adjust_sym_map_offset(&sym, &shdr, map); 993 sym.st_value -= shdr.sh_addr - shdr.sh_offset;
1000 994
1001 if (strcmp(section_name, 995 if (strcmp(section_name,
1002 (curr_dso->short_name + 996 (curr_dso->short_name +
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 2bd6a1f01a1c..aad99e7e179b 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -344,9 +344,6 @@ int setup_intlist(struct intlist **list, const char *list_str,
344#ifdef HAVE_LIBELF_SUPPORT 344#ifdef HAVE_LIBELF_SUPPORT
345bool elf__needs_adjust_symbols(GElf_Ehdr ehdr); 345bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);
346void arch__sym_update(struct symbol *s, GElf_Sym *sym); 346void arch__sym_update(struct symbol *s, GElf_Sym *sym);
347void arch__adjust_sym_map_offset(GElf_Sym *sym,
348 GElf_Shdr *shdr __maybe_unused,
349 struct map *map __maybe_unused);
350#endif 347#endif
351 348
352#define SYMBOL_A 0 349#define SYMBOL_A 0
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index 19e5db90394c..6eea7cff3d4e 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -15,9 +15,9 @@
15 15
16#include "syscalltbl.h" 16#include "syscalltbl.h"
17#include <stdlib.h> 17#include <stdlib.h>
18#include <linux/compiler.h>
18 19
19#ifdef HAVE_SYSCALL_TABLE 20#ifdef HAVE_SYSCALL_TABLE
20#include <linux/compiler.h>
21#include <string.h> 21#include <string.h>
22#include "string2.h" 22#include "string2.h"
23#include "util.h" 23#include "util.h"
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 26ce4f7168be..ff805643b5f7 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -52,6 +52,10 @@ override LDFLAGS =
52override MAKEFLAGS = 52override MAKEFLAGS =
53endif 53endif
54 54
55ifneq ($(KBUILD_SRC),)
56override LDFLAGS =
57endif
58
55BUILD := $(O) 59BUILD := $(O)
56ifndef BUILD 60ifndef BUILD
57 BUILD := $(KBUILD_OUTPUT) 61 BUILD := $(KBUILD_OUTPUT)
@@ -62,32 +66,32 @@ endif
62 66
63export BUILD 67export BUILD
64all: 68all:
65 for TARGET in $(TARGETS); do \ 69 @for TARGET in $(TARGETS); do \
66 BUILD_TARGET=$$BUILD/$$TARGET; \ 70 BUILD_TARGET=$$BUILD/$$TARGET; \
67 mkdir $$BUILD_TARGET -p; \ 71 mkdir $$BUILD_TARGET -p; \
68 make OUTPUT=$$BUILD_TARGET -C $$TARGET;\ 72 make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
69 done; 73 done;
70 74
71run_tests: all 75run_tests: all
72 for TARGET in $(TARGETS); do \ 76 @for TARGET in $(TARGETS); do \
73 BUILD_TARGET=$$BUILD/$$TARGET; \ 77 BUILD_TARGET=$$BUILD/$$TARGET; \
74 make OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\ 78 make OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
75 done; 79 done;
76 80
77hotplug: 81hotplug:
78 for TARGET in $(TARGETS_HOTPLUG); do \ 82 @for TARGET in $(TARGETS_HOTPLUG); do \
79 BUILD_TARGET=$$BUILD/$$TARGET; \ 83 BUILD_TARGET=$$BUILD/$$TARGET; \
80 make OUTPUT=$$BUILD_TARGET -C $$TARGET;\ 84 make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
81 done; 85 done;
82 86
83run_hotplug: hotplug 87run_hotplug: hotplug
84 for TARGET in $(TARGETS_HOTPLUG); do \ 88 @for TARGET in $(TARGETS_HOTPLUG); do \
85 BUILD_TARGET=$$BUILD/$$TARGET; \ 89 BUILD_TARGET=$$BUILD/$$TARGET; \
86 make OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\ 90 make OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\
87 done; 91 done;
88 92
89clean_hotplug: 93clean_hotplug:
90 for TARGET in $(TARGETS_HOTPLUG); do \ 94 @for TARGET in $(TARGETS_HOTPLUG); do \
91 BUILD_TARGET=$$BUILD/$$TARGET; \ 95 BUILD_TARGET=$$BUILD/$$TARGET; \
92 make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ 96 make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
93 done; 97 done;
@@ -103,7 +107,7 @@ install:
103ifdef INSTALL_PATH 107ifdef INSTALL_PATH
104 @# Ask all targets to install their files 108 @# Ask all targets to install their files
105 mkdir -p $(INSTALL_PATH) 109 mkdir -p $(INSTALL_PATH)
106 for TARGET in $(TARGETS); do \ 110 @for TARGET in $(TARGETS); do \
107 BUILD_TARGET=$$BUILD/$$TARGET; \ 111 BUILD_TARGET=$$BUILD/$$TARGET; \
108 make OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \ 112 make OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
109 done; 113 done;
@@ -128,7 +132,7 @@ else
128endif 132endif
129 133
130clean: 134clean:
131 for TARGET in $(TARGETS); do \ 135 @for TARGET in $(TARGETS); do \
132 BUILD_TARGET=$$BUILD/$$TARGET; \ 136 BUILD_TARGET=$$BUILD/$$TARGET; \
133 make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ 137 make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
134 done; 138 done;
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index 20ecbaa0d85d..6c53a8906eff 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -12,6 +12,7 @@ static inline unsigned int bpf_num_possible_cpus(void)
12 unsigned int start, end, possible_cpus = 0; 12 unsigned int start, end, possible_cpus = 0;
13 char buff[128]; 13 char buff[128];
14 FILE *fp; 14 FILE *fp;
15 int n;
15 16
16 fp = fopen(fcpu, "r"); 17 fp = fopen(fcpu, "r");
17 if (!fp) { 18 if (!fp) {
@@ -20,17 +21,17 @@ static inline unsigned int bpf_num_possible_cpus(void)
20 } 21 }
21 22
22 while (fgets(buff, sizeof(buff), fp)) { 23 while (fgets(buff, sizeof(buff), fp)) {
23 if (sscanf(buff, "%u-%u", &start, &end) == 2) { 24 n = sscanf(buff, "%u-%u", &start, &end);
24 possible_cpus = start == 0 ? end + 1 : 0; 25 if (n == 0) {
25 break; 26 printf("Failed to retrieve # possible CPUs!\n");
27 exit(1);
28 } else if (n == 1) {
29 end = start;
26 } 30 }
31 possible_cpus = start == 0 ? end + 1 : 0;
32 break;
27 } 33 }
28
29 fclose(fp); 34 fclose(fp);
30 if (!possible_cpus) {
31 printf("Failed to retrieve # possible CPUs!\n");
32 exit(1);
33 }
34 35
35 return possible_cpus; 36 return possible_cpus;
36} 37}
diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile
index 6b214b7b10fb..247b0a1899d7 100644
--- a/tools/testing/selftests/breakpoints/Makefile
+++ b/tools/testing/selftests/breakpoints/Makefile
@@ -2,14 +2,14 @@
2uname_M := $(shell uname -m 2>/dev/null || echo not) 2uname_M := $(shell uname -m 2>/dev/null || echo not)
3ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) 3ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
4 4
5TEST_GEN_PROGS := step_after_suspend_test
6
5ifeq ($(ARCH),x86) 7ifeq ($(ARCH),x86)
6TEST_GEN_PROGS := breakpoint_test 8TEST_GEN_PROGS += breakpoint_test
7endif 9endif
8ifneq (,$(filter $(ARCH),aarch64 arm64)) 10ifneq (,$(filter $(ARCH),aarch64 arm64))
9TEST_GEN_PROGS := breakpoint_test_arm64 11TEST_GEN_PROGS += breakpoint_test_arm64
10endif 12endif
11 13
12TEST_GEN_PROGS += step_after_suspend_test
13
14include ../lib.mk 14include ../lib.mk
15 15
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
index 2a1cb9908746..a4fd4c851a5b 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -1,6 +1,8 @@
1#!/bin/sh 1#!/bin/sh
2# description: Register/unregister many kprobe events 2# description: Register/unregister many kprobe events
3 3
4[ -f kprobe_events ] || exit_unsupported # this is configurable
5
4# ftrace fentry skip size depends on the machine architecture. 6# ftrace fentry skip size depends on the machine architecture.
5# Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc64le 7# Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc64le
6case `uname -m` in 8case `uname -m` in
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index 7c647f619d63..f0c0369ccb79 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -7,14 +7,17 @@ TEST_PROGS := run.sh
7include ../lib.mk 7include ../lib.mk
8 8
9all: 9all:
10 for DIR in $(SUBDIRS); do \ 10 @for DIR in $(SUBDIRS); do \
11 BUILD_TARGET=$(OUTPUT)/$$DIR; \ 11 BUILD_TARGET=$(OUTPUT)/$$DIR; \
12 mkdir $$BUILD_TARGET -p; \ 12 mkdir $$BUILD_TARGET -p; \
13 make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ 13 make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
14 if [ -e $$DIR/$(TEST_PROGS) ]; then
15 rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/;
16 fi
14 done 17 done
15 18
16override define RUN_TESTS 19override define RUN_TESTS
17 $(OUTPUT)/run.sh 20 @cd $(OUTPUT); ./run.sh
18endef 21endef
19 22
20override define INSTALL_RULE 23override define INSTALL_RULE
@@ -33,7 +36,7 @@ override define EMIT_TESTS
33endef 36endef
34 37
35override define CLEAN 38override define CLEAN
36 for DIR in $(SUBDIRS); do \ 39 @for DIR in $(SUBDIRS); do \
37 BUILD_TARGET=$(OUTPUT)/$$DIR; \ 40 BUILD_TARGET=$(OUTPUT)/$$DIR; \
38 mkdir $$BUILD_TARGET -p; \ 41 mkdir $$BUILD_TARGET -p; \
39 make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ 42 make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile
index 849a90ffe8dd..a97e24edde39 100644
--- a/tools/testing/selftests/intel_pstate/Makefile
+++ b/tools/testing/selftests/intel_pstate/Makefile
@@ -1,7 +1,9 @@
1CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE 1CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE
2LDLIBS := $(LDLIBS) -lm 2LDLIBS := $(LDLIBS) -lm
3 3
4ifeq (,$(filter $(ARCH),x86))
4TEST_GEN_FILES := msr aperf 5TEST_GEN_FILES := msr aperf
6endif
5 7
6TEST_PROGS := run.sh 8TEST_PROGS := run.sh
7 9
diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh
index 7868c106b8b1..d3ab48f91cd6 100755
--- a/tools/testing/selftests/intel_pstate/run.sh
+++ b/tools/testing/selftests/intel_pstate/run.sh
@@ -29,13 +29,12 @@
29 29
30EVALUATE_ONLY=0 30EVALUATE_ONLY=0
31 31
32max_cpus=$(($(nproc)-1)) 32if ! uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ | grep -q x86; then
33 echo "$0 # Skipped: Test can only run on x86 architectures."
34 exit 0
35fi
33 36
34# compile programs 37max_cpus=$(($(nproc)-1))
35gcc aperf.c -Wall -D_GNU_SOURCE -o aperf -lm
36[ $? -ne 0 ] && echo "Problem compiling aperf.c." && exit 1
37gcc -o msr msr.c -lm
38[ $? -ne 0 ] && echo "Problem compiling msr.c." && exit 1
39 38
40function run_test () { 39function run_test () {
41 40
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 693616651da5..f65886af7c0c 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -6,7 +6,14 @@ ifeq (0,$(MAKELEVEL))
6OUTPUT := $(shell pwd) 6OUTPUT := $(shell pwd)
7endif 7endif
8 8
9# The following are built by lib.mk common compile rules.
10# TEST_CUSTOM_PROGS should be used by tests that require
11# custom build rule and prevent common build rule use.
12# TEST_PROGS are for test shell scripts.
13# TEST_CUSTOM_PROGS and TEST_PROGS will be run by common run_tests
14# and install targets. Common clean doesn't touch them.
9TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS)) 15TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
16TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED))
10TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES)) 17TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
11 18
12all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) 19all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
@@ -20,17 +27,28 @@ define RUN_TESTS
20 test_num=`echo $$test_num+1 | bc`; \ 27 test_num=`echo $$test_num+1 | bc`; \
21 echo "selftests: $$BASENAME_TEST"; \ 28 echo "selftests: $$BASENAME_TEST"; \
22 echo "========================================"; \ 29 echo "========================================"; \
23 if [ ! -x $$BASENAME_TEST ]; then \ 30 if [ ! -x $$TEST ]; then \
24 echo "selftests: Warning: file $$BASENAME_TEST is not executable, correct this.";\ 31 echo "selftests: Warning: file $$BASENAME_TEST is not executable, correct this.";\
25 echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; \ 32 echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; \
26 else \ 33 else \
27 cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\ 34 cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\
28 fi; \ 35 fi; \
29 done; 36 done;
30endef 37endef
31 38
32run_tests: all 39run_tests: all
33 $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_PROGS)) 40ifneq ($(KBUILD_SRC),)
41 @if [ "X$(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)" != "X" ]; then
42 @rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT)
43 fi
44 @if [ "X$(TEST_PROGS)" != "X" ]; then
45 $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS))
46 else
47 $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS))
48 fi
49else
50 $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS))
51endif
34 52
35define INSTALL_RULE 53define INSTALL_RULE
36 @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \ 54 @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \
@@ -38,10 +56,10 @@ define INSTALL_RULE
38 echo "rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/"; \ 56 echo "rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/"; \
39 rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/; \ 57 rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/; \
40 fi 58 fi
41 @if [ "X$(TEST_GEN_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then \ 59 @if [ "X$(TEST_GEN_PROGS)$(TEST_CUSTOM_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then \
42 mkdir -p ${INSTALL_PATH}; \ 60 mkdir -p ${INSTALL_PATH}; \
43 echo "rsync -a $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/"; \ 61 echo "rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/"; \
44 rsync -a $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/; \ 62 rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/; \
45 fi 63 fi
46endef 64endef
47 65
@@ -53,15 +71,20 @@ else
53endif 71endif
54 72
55define EMIT_TESTS 73define EMIT_TESTS
56 @for TEST in $(TEST_GEN_PROGS) $(TEST_PROGS); do \ 74 @for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
57 BASENAME_TEST=`basename $$TEST`; \ 75 BASENAME_TEST=`basename $$TEST`; \
58 echo "(./$$BASENAME_TEST && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \ 76 echo "(./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \
59 done; 77 done;
60endef 78endef
61 79
62emit_tests: 80emit_tests:
63 $(EMIT_TESTS) 81 $(EMIT_TESTS)
64 82
83# define if isn't already. It is undefined in make O= case.
84ifeq ($(RM),)
85RM := rm -f
86endif
87
65define CLEAN 88define CLEAN
66 $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) 89 $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
67endef 90endef
@@ -69,6 +92,15 @@ endef
69clean: 92clean:
70 $(CLEAN) 93 $(CLEAN)
71 94
95# When make O= with kselftest target from main level
96# the following aren't defined.
97#
98ifneq ($(KBUILD_SRC),)
99LINK.c = $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH)
100COMPILE.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c
101LINK.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH)
102endif
103
72$(OUTPUT)/%:%.c 104$(OUTPUT)/%:%.c
73 $(LINK.c) $^ $(LDLIBS) -o $@ 105 $(LINK.c) $^ $(LDLIBS) -o $@
74 106
diff --git a/tools/testing/selftests/memfd/run_tests.sh b/tools/testing/selftests/memfd/run_tests.sh
index daabb350697c..daabb350697c 100644..100755
--- a/tools/testing/selftests/memfd/run_tests.sh
+++ b/tools/testing/selftests/memfd/run_tests.sh
diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile
index 79a664aeb8d7..0f5e347b068d 100644
--- a/tools/testing/selftests/mqueue/Makefile
+++ b/tools/testing/selftests/mqueue/Makefile
@@ -5,8 +5,8 @@ TEST_GEN_PROGS := mq_open_tests mq_perf_tests
5include ../lib.mk 5include ../lib.mk
6 6
7override define RUN_TESTS 7override define RUN_TESTS
8 @./mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]" 8 $(OUTPUT)/mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]"
9 @./mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]" 9 $(OUTPUT)//mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]"
10endef 10endef
11 11
12override define EMIT_TESTS 12override define EMIT_TESTS
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 9801253e4802..c612d6e38c62 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -6,3 +6,4 @@ reuseport_bpf
6reuseport_bpf_cpu 6reuseport_bpf_cpu
7reuseport_bpf_numa 7reuseport_bpf_numa
8reuseport_dualstack 8reuseport_dualstack
9reuseaddr_conflict
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index de1f5772b878..d86bca991f45 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,9 +5,9 @@ CFLAGS += -I../../../../usr/include/
5 5
6TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh 6TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
7TEST_GEN_FILES = socket 7TEST_GEN_FILES = socket
8TEST_GEN_FILES += psock_fanout psock_tpacket 8TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
9TEST_GEN_FILES += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa 9TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
10TEST_GEN_FILES += reuseport_dualstack msg_zerocopy 10TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict
11 11
12include ../lib.mk 12include ../lib.mk
13 13
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 40232af5b023..3ab6ec403905 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -55,7 +55,7 @@
55#include <unistd.h> 55#include <unistd.h>
56 56
57#ifndef SO_EE_ORIGIN_ZEROCOPY 57#ifndef SO_EE_ORIGIN_ZEROCOPY
58#define SO_EE_ORIGIN_ZEROCOPY SO_EE_ORIGIN_UPAGE 58#define SO_EE_ORIGIN_ZEROCOPY 5
59#endif 59#endif
60 60
61#ifndef SO_ZEROCOPY 61#ifndef SO_ZEROCOPY
diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh
index 4e00568d70c2..90cb903c3381 100755
--- a/tools/testing/selftests/net/netdevice.sh
+++ b/tools/testing/selftests/net/netdevice.sh
@@ -178,7 +178,7 @@ if [ "$(id -u)" -ne 0 ];then
178 exit 0 178 exit 0
179fi 179fi
180 180
181ip -Version 2>/dev/null >/dev/null 181ip link show 2>/dev/null >/dev/null
182if [ $? -ne 0 ];then 182if [ $? -ne 0 ];then
183 echo "SKIP: Could not run test without the ip tool" 183 echo "SKIP: Could not run test without the ip tool"
184 exit 0 184 exit 0
diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c
new file mode 100644
index 000000000000..7c5b12664b03
--- /dev/null
+++ b/tools/testing/selftests/net/reuseaddr_conflict.c
@@ -0,0 +1,114 @@
1/*
2 * Test for the regression introduced by
3 *
4 * b9470c27607b ("inet: kill smallest_size and smallest_port")
5 *
6 * If we open an ipv4 socket on a port with reuseaddr we shouldn't reset the tb
7 * when we open the ipv6 conterpart, which is what was happening previously.
8 */
9#include <errno.h>
10#include <error.h>
11#include <arpa/inet.h>
12#include <netinet/in.h>
13#include <stdbool.h>
14#include <stdio.h>
15#include <sys/socket.h>
16#include <sys/types.h>
17#include <unistd.h>
18
19#define PORT 9999
20
21int open_port(int ipv6, int any)
22{
23 int fd = -1;
24 int reuseaddr = 1;
25 int v6only = 1;
26 int addrlen;
27 int ret = -1;
28 struct sockaddr *addr;
29 int family = ipv6 ? AF_INET6 : AF_INET;
30
31 struct sockaddr_in6 addr6 = {
32 .sin6_family = AF_INET6,
33 .sin6_port = htons(PORT),
34 .sin6_addr = in6addr_any
35 };
36 struct sockaddr_in addr4 = {
37 .sin_family = AF_INET,
38 .sin_port = htons(PORT),
39 .sin_addr.s_addr = any ? htonl(INADDR_ANY) : inet_addr("127.0.0.1"),
40 };
41
42
43 if (ipv6) {
44 addr = (struct sockaddr*)&addr6;
45 addrlen = sizeof(addr6);
46 } else {
47 addr = (struct sockaddr*)&addr4;
48 addrlen = sizeof(addr4);
49 }
50
51 if ((fd = socket(family, SOCK_STREAM, IPPROTO_TCP)) < 0) {
52 perror("socket");
53 goto out;
54 }
55
56 if (ipv6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (void*)&v6only,
57 sizeof(v6only)) < 0) {
58 perror("setsockopt IPV6_V6ONLY");
59 goto out;
60 }
61
62 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr,
63 sizeof(reuseaddr)) < 0) {
64 perror("setsockopt SO_REUSEADDR");
65 goto out;
66 }
67
68 if (bind(fd, addr, addrlen) < 0) {
69 perror("bind");
70 goto out;
71 }
72
73 if (any)
74 return fd;
75
76 if (listen(fd, 1) < 0) {
77 perror("listen");
78 goto out;
79 }
80 return fd;
81out:
82 close(fd);
83 return ret;
84}
85
86int main(void)
87{
88 int listenfd;
89 int fd1, fd2;
90
91 fprintf(stderr, "Opening 127.0.0.1:%d\n", PORT);
92 listenfd = open_port(0, 0);
93 if (listenfd < 0)
94 error(1, errno, "Couldn't open listen socket");
95 fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT);
96 fd1 = open_port(0, 1);
97 if (fd1 >= 0)
98 error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket");
99 fprintf(stderr, "Opening in6addr_any:%d\n", PORT);
100 fd1 = open_port(1, 1);
101 if (fd1 < 0)
102 error(1, errno, "Couldn't open ipv6 reuseport");
103 fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT);
104 fd2 = open_port(0, 1);
105 if (fd2 >= 0)
106 error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket");
107 close(fd1);
108 fprintf(stderr, "Opening INADDR_ANY:%d after closing ipv6 socket\n", PORT);
109 fd1 = open_port(0, 1);
110 if (fd1 >= 0)
111 error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6");
112 fprintf(stderr, "Success");
113 return 0;
114}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 67c3e2764303..24dbf634e2dd 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -6,10 +6,18 @@
6 */ 6 */
7 7
8#include <sys/types.h> 8#include <sys/types.h>
9#include <asm/siginfo.h> 9
10#define __have_siginfo_t 1 10/*
11#define __have_sigval_t 1 11 * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
12#define __have_sigevent_t 1 12 * we need to use the kernel's siginfo.h file and trick glibc
13 * into accepting it.
14 */
15#if !__GLIBC_PREREQ(2, 26)
16# include <asm/siginfo.h>
17# define __have_siginfo_t 1
18# define __have_sigval_t 1
19# define __have_sigevent_t 1
20#endif
13 21
14#include <errno.h> 22#include <errno.h>
15#include <linux/filter.h> 23#include <linux/filter.h>
@@ -884,7 +892,7 @@ TEST_F_SIGNAL(TRAP, ign, SIGSYS)
884 syscall(__NR_getpid); 892 syscall(__NR_getpid);
885} 893}
886 894
887static struct siginfo TRAP_info; 895static siginfo_t TRAP_info;
888static volatile int TRAP_nr; 896static volatile int TRAP_nr;
889static void TRAP_action(int nr, siginfo_t *info, void *void_context) 897static void TRAP_action(int nr, siginfo_t *info, void *void_context)
890{ 898{
diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c
index 7d406c3973ba..97bb150837df 100644
--- a/tools/testing/selftests/sigaltstack/sas.c
+++ b/tools/testing/selftests/sigaltstack/sas.c
@@ -39,7 +39,11 @@ void my_usr1(int sig, siginfo_t *si, void *u)
39 stack_t stk; 39 stack_t stk;
40 struct stk_data *p; 40 struct stk_data *p;
41 41
42#if __s390x__
43 register unsigned long sp asm("%15");
44#else
42 register unsigned long sp asm("sp"); 45 register unsigned long sp asm("sp");
46#endif
43 47
44 if (sp < (unsigned long)sstack || 48 if (sp < (unsigned long)sstack ||
45 sp >= (unsigned long)sstack + SIGSTKSZ) { 49 sp >= (unsigned long)sstack + SIGSTKSZ) {
diff --git a/tools/testing/selftests/sync/Makefile b/tools/testing/selftests/sync/Makefile
index 4981c6b6d050..8e04d0afcbd7 100644
--- a/tools/testing/selftests/sync/Makefile
+++ b/tools/testing/selftests/sync/Makefile
@@ -2,12 +2,16 @@ CFLAGS += -O2 -g -std=gnu89 -pthread -Wall -Wextra
2CFLAGS += -I../../../../usr/include/ 2CFLAGS += -I../../../../usr/include/
3LDFLAGS += -pthread 3LDFLAGS += -pthread
4 4
5TEST_PROGS = sync_test 5.PHONY: all clean
6
7all: $(TEST_PROGS)
8 6
9include ../lib.mk 7include ../lib.mk
10 8
9# lib.mk TEST_CUSTOM_PROGS var is for custom tests that need special
10# build rules. lib.mk will run and install them.
11
12TEST_CUSTOM_PROGS := $(OUTPUT)/sync_test
13all: $(TEST_CUSTOM_PROGS)
14
11OBJS = sync_test.o sync.o 15OBJS = sync_test.o sync.o
12 16
13TESTS += sync_alloc.o 17TESTS += sync_alloc.o
@@ -18,6 +22,16 @@ TESTS += sync_stress_parallelism.o
18TESTS += sync_stress_consumer.o 22TESTS += sync_stress_consumer.o
19TESTS += sync_stress_merge.o 23TESTS += sync_stress_merge.o
20 24
21sync_test: $(OBJS) $(TESTS) 25OBJS := $(patsubst %,$(OUTPUT)/%,$(OBJS))
26TESTS := $(patsubst %,$(OUTPUT)/%,$(TESTS))
27
28$(TEST_CUSTOM_PROGS): $(TESTS) $(OBJS)
29 $(CC) -o $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS) $(CFLAGS) $(LDFLAGS)
30
31$(OBJS): $(OUTPUT)/%.o: %.c
32 $(CC) -c $^ -o $@
33
34$(TESTS): $(OUTPUT)/%.o: %.c
35 $(CC) -c $^ -o $@
22 36
23EXTRA_CLEAN := sync_test $(OBJS) $(TESTS) 37EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS)
diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c
index 9c92b7bd5641..50da45437daa 100644
--- a/tools/testing/selftests/timers/set-timer-lat.c
+++ b/tools/testing/selftests/timers/set-timer-lat.c
@@ -143,7 +143,8 @@ int setup_timer(int clock_id, int flags, int interval, timer_t *tm1)
143 printf("%-22s %s missing CAP_WAKE_ALARM? : [UNSUPPORTED]\n", 143 printf("%-22s %s missing CAP_WAKE_ALARM? : [UNSUPPORTED]\n",
144 clockstring(clock_id), 144 clockstring(clock_id),
145 flags ? "ABSTIME":"RELTIME"); 145 flags ? "ABSTIME":"RELTIME");
146 return 0; 146 /* Indicate timer isn't set, so caller doesn't wait */
147 return 1;
147 } 148 }
148 printf("%s - timer_create() failed\n", clockstring(clock_id)); 149 printf("%s - timer_create() failed\n", clockstring(clock_id));
149 return -1; 150 return -1;
@@ -213,8 +214,9 @@ int do_timer(int clock_id, int flags)
213 int err; 214 int err;
214 215
215 err = setup_timer(clock_id, flags, interval, &tm1); 216 err = setup_timer(clock_id, flags, interval, &tm1);
217 /* Unsupported case - return 0 to not fail the test */
216 if (err) 218 if (err)
217 return err; 219 return err == 1 ? 0 : err;
218 220
219 while (alarmcount < 5) 221 while (alarmcount < 5)
220 sleep(1); 222 sleep(1);
@@ -228,18 +230,17 @@ int do_timer_oneshot(int clock_id, int flags)
228 timer_t tm1; 230 timer_t tm1;
229 const int interval = 0; 231 const int interval = 0;
230 struct timeval timeout; 232 struct timeval timeout;
231 fd_set fds;
232 int err; 233 int err;
233 234
234 err = setup_timer(clock_id, flags, interval, &tm1); 235 err = setup_timer(clock_id, flags, interval, &tm1);
236 /* Unsupported case - return 0 to not fail the test */
235 if (err) 237 if (err)
236 return err; 238 return err == 1 ? 0 : err;
237 239
238 memset(&timeout, 0, sizeof(timeout)); 240 memset(&timeout, 0, sizeof(timeout));
239 timeout.tv_sec = 5; 241 timeout.tv_sec = 5;
240 FD_ZERO(&fds);
241 do { 242 do {
242 err = select(FD_SETSIZE, &fds, NULL, NULL, &timeout); 243 err = select(0, NULL, NULL, NULL, &timeout);
243 } while (err == -1 && errno == EINTR); 244 } while (err == -1 && errno == EINTR);
244 245
245 timer_delete(tm1); 246 timer_delete(tm1);
diff --git a/tools/testing/selftests/watchdog/Makefile b/tools/testing/selftests/watchdog/Makefile
index f863c664e3d1..ee068511fd0b 100644
--- a/tools/testing/selftests/watchdog/Makefile
+++ b/tools/testing/selftests/watchdog/Makefile
@@ -1,8 +1,3 @@
1TEST_PROGS := watchdog-test 1TEST_GEN_PROGS := watchdog-test
2
3all: $(TEST_PROGS)
4 2
5include ../lib.mk 3include ../lib.mk
6
7clean:
8 rm -fr $(TEST_PROGS)